Complete Examples
Use MarkovML to power your training and testing process and get the best out of them.
Sentiment Classifier
In this example, we'll train and evaluate a sentiment classifier on a sample extracted from a 1.6 million tweet dataset available here.
Pre-Requisites
We recommend running this example in a virtual environment for running this example. Also, we recommend using anaconda for machine learning.
- Please complete the setup of your machine.
- Install Pandas, Keras, Tensorflow
pip install pandas scikit-learn keras tensorflow
- Create a folder name data and download the sample file from here into it. Divide this file into two parts: twitter_train_dataset.csv and twitter_test_dataset. In the below program we have taken a 80-20 train to test ratio.
Sample Code
import random
import time
import pandas as pd
from keras import Sequential, layers
from keras.layers import Dropout
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
import markov
from markov.api.schemas.model_recording import SingleTagInferenceRecord, RecordCustomMetric
# GET PROJECT
# we have already created a project for this model with project_id: 4FzUBTJv8f9uPb
# you can create a new project from the MarkovML SDK or the web UI
project = markov.Project.get_by_id("t86dPNewEhKwkT")
# GET DATASET
# We have uploaded the following train and test segments to markov as well with dataset id: 3vRT5Ut6mhPqFGc23
train_data_location = "./data/twitter_train_dataset.csv" # location of your dataset
test_data_location = "./data/twitter_test_dataset.csv" # location of your dataset
# read data into dataframe
train_df = pd.read_csv(train_data_location, encoding='latin')
test_df = pd.read_csv(test_data_location, encoding='latin')
# concatenate the data to vectorize both together
data = pd.concat([train_df, test_df])
# train a count vectorizer
tf_vec = CountVectorizer()
tf_vec.fit(data)
x_train, y_train = train_df['text'].values.tolist(), train_df['target'].values
x_test, y_test = test_df['text'].values.tolist(), test_df['target'].values
# transform to vectors
x_train_trans = tf_vec.transform(x_train)
x_test_trans = tf_vec.transform(x_test)
# BUILD MODEL
# Train your MODEL
suffix = int(time.time())
MODEL_NAME = f"Keras Model for Twitter Sentiment Analysis {suffix}"
# build a Keras Network
def _build_model(input_dim):
model = Sequential()
model.add(layers.Dense(64, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(layers.Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy']
)
return model
# build Keras Model
model = _build_model(x_train_trans.shape[1])
# TRACK THE TRAINING EXPERIMENT USING markovml
# add auto_record from markovml to capture this experiment
markov.keras.auto_record(
name=MODEL_NAME,
notes=f"Auto Recording Keras Model with Name: {MODEL_NAME} with Sentence Encoder",
project_id=project.project_id,
model_class=markov.ModelClass.TAGGING
)
# Train the model (it will take some time to converge!)
model.fit(x_train_trans, y_train, epochs=50, batch_size=32, verbose=False)
# EVALUATE THE MODEL
# print test accuracy report
y_pred = model.predict(x_test_trans)
orig_copy = y_pred.tolist()
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
acc = accuracy_score(y_test, y_pred)
print("Test accuracy:", acc)
# Register with MarkovML Backend
# Record the results with MarkovML Evaluator
evaluation_recorder = markov.EvaluationRecorder(
name=f"Sentiment Analysis Keras Model Evaluation {suffix}",
model_id=model.markov_model_id,
notes="This model evaluation captures the performance of V1 model"
" against baseline dataset for sentiment analysis",
dataset_id="3vRT5Ut6mhPqFGc23"
)
evaluation_recorder.register()
def _get_cost(inferred, actual):
if actual == inferred:
return 0
else:
return random.randint(2, 5)
urid = 1
for prob, pred, orig, txt in zip(orig_copy, y_pred, y_test, x_test):
urid = urid + 1
mi_record = SingleTagInferenceRecord(
inferred=float(pred[0]),
actual=float(orig),
urid=urid,
score=float(prob[0]),
custom_metrics=[
RecordCustomMetric(label="Cost", value=_get_cost(float(pred[0]), float(orig))),
RecordCustomMetric(label="Probability", value=float(prob[0]))
]
)
evaluation_recorder.add_record(mi_record)
outcome = evaluation_recorder.finish()
print(outcome)
MNIST Data Classifier
In this example, we will be creating a keras DNN model to identify the images MNIST dataset as numerical digits.
We will take the following steps:
- Use an existing project we created for working with MNIST dataset.
- Create an experiment to track the training process where we will record the hyper-parameters, loss curve, epoch time, CPU stats etc.
- Evaluate the trained model against the test dataset
Pre-Requisites
We recommend running this example in a virtual environment for running this example. Also, we recommend using anaconda for machine learning.
- Please complete the setup of your machine.
- Install Pandas, Keras, Tensorflow
pip install pandas scikit-learn keras tensorflow
Sample Code
import time
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.utils import to_categorical
import markov
from markov.api.schemas.model_recording import SingleTagInferenceRecord
# Load an existing project using its id. You can find the list of projects here: `app.markovml.com/<workspace_id>/proj`
# This is an optional step, you can directly use the project id in experimentation and evaluation of your model
project_name = "MNIST Project"
try:
mnist_project = markov.Project.get_by_name(project_name)
except markov.exceptions.ResourceNotFoundException:
mnist_project = markov.Project(name=project_name)
mnist_project.register()
# Load your dataset, in this case we are using the keras library to load the dataset. You can load it in any way.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
IMAGE_INPUT_SHAPE = 784
# Minor data pre-processing
# reshape, convert to float and normalize to send standard input into the DNN
x_train = x_train.reshape(-1, IMAGE_INPUT_SHAPE).astype("float32") / 255.0
x_test = x_test.reshape(-1, IMAGE_INPUT_SHAPE).astype("float32") / 255.0
num_classes = 10 # since there are 10 digits in which we are classifying the images to
# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
# build the model that will be used to classify the MNIST images
def _build_model_graph(input_shape=(IMAGE_INPUT_SHAPE,)):
model = Sequential()
model.add(Dense(512, activation="relu", input_shape=input_shape))
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.compile(
loss="categorical_crossentropy", optimizer=RMSprop(), metrics=["accuracy"]
)
return model
MODEL_NAME = f"Classification of MNIST Dataset using Keras DNN {int(time.time())}"
# auto_record will automatically track the experiment - including its hyper-parameters, loss curve, epoch time etc.
markov.keras.auto_record(
name=MODEL_NAME,
notes="This experiment is used to track the training process of the Keras DNN used for classification of MNIST.",
project_id=mnist_project.project_id, # you can simply paste the project_id here as well
)
model = _build_model_graph()
# The training process will automatically be tracked as we used "auto_record" above.
model.fit(x_train, y_train, batch_size=128, epochs=5)
# fetch dataset to be used for evaluation
dataset = markov.dataset.get_by_name(dataset_name="paste_dataset_name_here")
# Now let us evaluate this model against (x_test, y_test)
evaluation_recorder = markov.EvaluationRecorder(
name=f"Evaluate {MODEL_NAME}",
model_id=model.markov_model_id,
project_id=mnist_project.project_id,
dataset_id=dataset.ds_id # or directly paste the dataset-id from UI
)
evaluation_recorder.register()
y_pred = model.predict(x_test)
urid = 1
for pred, actual in zip(y_pred, y_test):
evaluation_record = SingleTagInferenceRecord(
inferred=pred.argmax().item(),
actual=actual.argmax().item(),
score=pred.max().item(),
urid=urid,
)
urid = urid + 1
evaluation_recorder.add_record(evaluation_record)
outcome = evaluation_recorder.finish()
Make sure to use your own project_name
, and paste your own dataset_name
in the above code sample.
Updated 4 months ago