Complete Examples

Use MarkovML to power your training and testing process and get the best out of them.

Sentiment Classifier

In this example, we'll train and evaluate a sentiment classifier on a sample extracted from a 1.6 million tweet dataset available here.

Pre-Requisites

We recommend running this example in a virtual environment for running this example. Also, we recommend using anaconda for machine learning.

  1. Please complete the setup of your machine.
  2. Install Pandas, Keras, Tensorflow
pip install pandas scikit-learn keras tensorflow
  1. Create a folder name data and download the sample file from here into it. Divide this file into two parts: twitter_train_dataset.csv and twitter_test_dataset. In the below program we have taken a 80-20 train to test ratio.

Sample Code

import random
import time

import pandas as pd
from keras import Sequential, layers
from keras.layers import Dropout
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

import markov
from markov.api.schemas.model_recording import SingleTagInferenceRecord, RecordCustomMetric

# GET PROJECT
# we have already created a project for this model with project_id: 4FzUBTJv8f9uPb
# you can create a new project from the MarkovML SDK or the web UI
project = markov.Project.get_by_id("t86dPNewEhKwkT")

# GET DATASET
# We have uploaded the following train and test segments to markov as well with dataset id: 3vRT5Ut6mhPqFGc23
train_data_location = "./data/twitter_train_dataset.csv"  # location of your dataset
test_data_location = "./data/twitter_test_dataset.csv"  # location of your dataset

# read data into dataframe
train_df = pd.read_csv(train_data_location, encoding='latin')
test_df = pd.read_csv(test_data_location, encoding='latin')

# concatenate the data to vectorize both together
data = pd.concat([train_df, test_df])

# train a count vectorizer
tf_vec = CountVectorizer()
tf_vec.fit(data)
x_train, y_train = train_df['text'].values.tolist(), train_df['target'].values
x_test, y_test = test_df['text'].values.tolist(), test_df['target'].values

# transform to vectors
x_train_trans = tf_vec.transform(x_train)
x_test_trans = tf_vec.transform(x_test)

# BUILD MODEL
# Train your MODEL
suffix = int(time.time())
MODEL_NAME = f"Keras Model for Twitter Sentiment Analysis {suffix}"


# build a Keras Network
def _build_model(input_dim):
    model = Sequential()
    model.add(layers.Dense(64, input_dim=input_dim, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(layers.Dense(16, activation='relu'))
    model.add(Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam', metrics=['accuracy']
                  )
    return model


# build Keras Model
model = _build_model(x_train_trans.shape[1])

# TRACK THE TRAINING EXPERIMENT USING markovml
# add auto_record from markovml to capture this experiment
markov.keras.auto_record(
    name=MODEL_NAME,
    notes=f"Auto Recording Keras Model with Name: {MODEL_NAME} with Sentence Encoder",
    project_id=project.project_id,
    model_class=markov.ModelClass.TAGGING
)

# Train the model (it will take some time to converge!)
model.fit(x_train_trans, y_train, epochs=50, batch_size=32, verbose=False)

# EVALUATE THE MODEL
# print test accuracy report
y_pred = model.predict(x_test_trans)
orig_copy = y_pred.tolist()
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
acc = accuracy_score(y_test, y_pred)
print("Test accuracy:", acc)

# Register with MarkovML Backend
# Record the results with MarkovML Evaluator
evaluation_recorder = markov.EvaluationRecorder(
    name=f"Sentiment Analysis Keras Model Evaluation {suffix}",
    model_id=model.markov_model_id,
    notes="This model evaluation captures the performance of V1 model"
          " against baseline dataset for sentiment analysis",
    dataset_id="3vRT5Ut6mhPqFGc23"
)

evaluation_recorder.register()


def _get_cost(inferred, actual):
    if actual == inferred:
        return 0
    else:
        return random.randint(2, 5)


urid = 1
for prob, pred, orig, txt in zip(orig_copy, y_pred, y_test, x_test):
    urid = urid + 1
    mi_record = SingleTagInferenceRecord(
        inferred=float(pred[0]),
        actual=float(orig),
        urid=urid,
        score=float(prob[0]),
        custom_metrics=[
            RecordCustomMetric(label="Cost", value=_get_cost(float(pred[0]), float(orig))),
            RecordCustomMetric(label="Probability", value=float(prob[0]))
        ]
    )
    evaluation_recorder.add_record(mi_record)

outcome = evaluation_recorder.finish()
print(outcome)

MNIST Data Classifier

In this example, we will be creating a keras DNN model to identify the images MNIST dataset as numerical digits.
We will take the following steps:

  1. Use an existing project we created for working with MNIST dataset.
  2. Create an experiment to track the training process where we will record the hyper-parameters, loss curve, epoch time, CPU stats etc.
  3. Evaluate the trained model against the test dataset

Pre-Requisites

We recommend running this example in a virtual environment for running this example. Also, we recommend using anaconda for machine learning.

  1. Please complete the setup of your machine.
  2. Install Pandas, Keras, Tensorflow
pip install pandas scikit-learn keras tensorflow

Sample Code

import time

from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.utils import to_categorical

import markov
from markov.api.schemas.model_recording import SingleTagInferenceRecord

# Load an existing project using its id. You can find the list of projects here: `app.markovml.com/<workspace_id>/proj`
# This is an optional step, you can directly use the project id in experimentation and evaluation of your model
project_name = "MNIST Project"
try:
	mnist_project = markov.Project.get_by_name(project_name)
except markov.exceptions.ResourceNotFoundException:
	mnist_project = markov.Project(name=project_name)
	mnist_project.register()

# Load your dataset, in this case we are using the keras library to load the dataset. You can load it in any way.
(x_train, y_train), (x_test, y_test) = mnist.load_data()

IMAGE_INPUT_SHAPE = 784

# Minor data pre-processing
# reshape, convert to float and normalize to send standard input into the DNN
x_train = x_train.reshape(-1, IMAGE_INPUT_SHAPE).astype("float32") / 255.0
x_test = x_test.reshape(-1, IMAGE_INPUT_SHAPE).astype("float32") / 255.0

num_classes = 10  # since there are 10 digits in which we are classifying the images to

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)


# build the model that will be used to classify the MNIST images
def _build_model_graph(input_shape=(IMAGE_INPUT_SHAPE,)):
	model = Sequential()
  model.add(Dense(512, activation="relu", input_shape=input_shape))
  model.add(Dense(512, activation="relu"))
  model.add(Dense(10, activation="softmax"))
  model.compile(
    loss="categorical_crossentropy", optimizer=RMSprop(), metrics=["accuracy"]
  )
  
  return model


MODEL_NAME = f"Classification of MNIST Dataset using Keras DNN {int(time.time())}"


# auto_record will automatically track the experiment - including its hyper-parameters, loss curve, epoch time etc.
markov.keras.auto_record(
  name=MODEL_NAME,
  notes="This experiment is used to track the training process of the Keras DNN used for classification of MNIST.",
  project_id=mnist_project.project_id,  # you can simply paste the project_id here as well
)

model = _build_model_graph()

# The training process will automatically be tracked as we used "auto_record" above.
model.fit(x_train, y_train, batch_size=128, epochs=5)

# fetch dataset to be used for evaluation
dataset = markov.dataset.get_by_name(dataset_name="paste_dataset_name_here")

# Now let us evaluate this model against (x_test, y_test)
evaluation_recorder = markov.EvaluationRecorder(
  name=f"Evaluate {MODEL_NAME}",
  model_id=model.markov_model_id,
  project_id=mnist_project.project_id,
  dataset_id=dataset.ds_id   # or directly paste the dataset-id from UI
)

evaluation_recorder.register()

y_pred = model.predict(x_test)

urid = 1
for pred, actual in zip(y_pred, y_test):
  evaluation_record = SingleTagInferenceRecord(
    inferred=pred.argmax().item(),
    actual=actual.argmax().item(),
    score=pred.max().item(),
    urid=urid,
  )
  urid = urid + 1

  evaluation_recorder.add_record(evaluation_record)

outcome = evaluation_recorder.finish()

Make sure to use your own project_name, and paste your own dataset_name in the above code sample.