Learn how to train a simple CNN in PyTorch and how to convert it to ONNX for deployment.
July 26, 2024
07m Read
By: Abhilaksh Singh Reen
In this article, we'll create a very basic CNN using PyTorch that will serve as a baseline for the various deployment methods we'll be going over in future Blog Posts. Today, we'll make a CNN (Convolutional Neural Network) that can recognize handwritten digits (0 to 9) trained using the MNIST dataset. We'll also export our model to ONNX, this way it can be used with any language that has an ONNX Runtime available for it (for example JavaScript).
Here's the directory structure of the codebase we'll be creating.
│ config.yaml
│ requirements.txt
│
├───data
├───models
└───src
│ config.py
│ dataset.py
│ model.py
│ test.py
│ train.py
│
└───converters
converter_onnx.py
We have the config and requirements files that define our training and testing configuration and the project's required packages respectively.
The data directory will be used to store the dataset that PyTorch will download for us and we'll store our model weights in the models directory. The src directory contains our codebase.
Let's create the config.yaml file to define the training and testing config.
training_num_epochs: 10
training_save_weights_epoch_interval: 1
training_batch_size: 100
learning_rate: 0.001
testing_batch_size: 100
log_batch_interval: 100
Next, we create the src/config.py file that loads this configuration into Python and also defines some directory paths for our convenience.
from os.path import dirname, join as path_join
from yaml import FullLoader as yaml_FullLoader, load as yaml_load
config_file_path = path_join(dirname(dirname(__file__)), "config.yaml")
with open(config_file_path, "r") as model_params_file:
config = yaml_load(model_params_file, Loader=yaml_FullLoader)
models_dir = path_join(dirname(dirname(__file__)), "models")
data_dir = path_join(dirname(dirname(__file__)), "data")
Inside the src folder, create a file called dataset.py, in which we will define our data loaders.
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from config import config
train_data = MNIST(
root="data",
train=True,
transform=ToTensor(),
download=True
)
test_data = MNIST(
root="data",
train=False,
transform=ToTensor(),
download=True
)
data_loaders = {
"train": DataLoader(
train_data,
batch_size=config['training_batch_size'],
shuffle=True,
num_workers=1
),
"test": DataLoader(
test_data,
batch_size=config['testing_batch_size'],
shuffle=True,
num_workers=1
),
}
The torchvision package allows us easy access to the MNIST dataset using torchvision.datasets.MNIST. We first load the training and testing data, which is determined by the train parameter passed to the MNIST constructor. We also call the ToTensor transform on this data.
Then, we define our data loaders which are instances of torch.utils.data.DataLoader: one for training and another for testing. Note how the training and testing batch sizes are being loaded from the config.
Next up, let's create the architecture of our CNN. In the src folder, create a new file called model.py.
import torch
import torch.nn as nn
import torch.optim as optim
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5)
self.dropout = nn.Dropout(p=0.5)
self.fc1 = nn.Linear(20 * 4 * 4, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = self.dropout(x)
x = x.view(-1, 20 * 4 * 4) # Flatten the tensor
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return torch.softmax(x, dim=1)
# Create an instance of the CNN model
model = CNN()
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
We create a simple CNN with two convolution, one max pooling, one dropout, and two fully-connected layers. We use Cross Entropy as our loss function and the Adam Optimizer.
In the same directory, create a file called train.py. Calling this file will train our model. Inside this file, we make the required imports and then define a class called ModelTrainer.
from datetime import datetime
from os import makedirs
from os.path import join
import torch
import torch.nn as nn
import torch.optim as optim
from .config import config, models_dir
from .dataset import data_loaders
from .model import CNN
class ModelTrainer:
def __init__(self):
self.training_id = "torch---" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = CNN()
self.model = self.model.to(self.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=config['learning_rate'])
self.loss_fn = nn.CrossEntropyLoss()
self.models_dir = join(models_dir, self.training_id)
makedirs(self.models_dir, exist_ok=False)
def save_weights(self, file_path):
torch.save(self.model.state_dict(), file_path)
In the constructor, we generate a unique training id, instantiate the model, the optimizer, and the loss function. We move the model to our device (CPU/GPU) and make a new directory in our models dir for the particular training id. We also define a function save_weights that will save our model's weights at the provided file path - we'll use this to save the model weights after every training epoch.
Speaking of epochs, let's create a function that can train our model for one epoch.
def train_single_epoch(self):
self.model.train()
for batch_index, (data, target) in enumerate(data_loaders["train"]):
data = data.to(self.device)
target = target.to(self.device)
self.optimizer.zero_grad()
output = self.model(data)
loss = self.loss_fn(output, target)
loss.backward()
self.optimizer.step()
if batch_index % config['log_batch_interval'] == 0:
print(f" {batch_index} / {len(data_loaders['train'])}")
This is the standard training strategy. The last couple of lines are just for logging the training progress at the specified interval.
Now, we want to create a function that can test the model on our test data and get us the loss and accuracy.
def test(self):
self.model.eval()
sum_test_loss = 0
num_correct = 0
with torch.no_grad():
for data, target in data_loaders["test"]:
data = data.to(self.device)
target = target.to(self.device)
output = self.model(data)
sum_test_loss += self.loss_fn(output, target).item()
prediction = output.argmax(dim=1, keepdim=True)
num_correct += prediction.eq(target.view_as(prediction)).sum().item()
average_test_loss = sum_test_loss / len(data_loaders['test'].dataset)
accuracy = num_correct / len(data_loaders['test'].dataset)
print(f" Average loss: {average_test_loss}")
print(f" Accuracy: {accuracy}")
For all the testing samples, we calculate the average loss and the number of predictions the model got correct and then print them to the console.
And, finally, we can define our train function, which just trains our model for the specified number of epochs.
def train(self):
for epoch_index in range(config['training_num_epochs']):
print(f"Epoch: {epoch_index}")
self.train_single_epoch()
model_save_file_path = join(self.models_dir, f"epoch-{epoch_index}.pt")
self.save_weights(model_save_file_path)
self.test()
Outside the ModelTrainer class, in the main block, we can create an instance of the class and call the train function.
if __name__ == "__main__":
model_trainer = ModelTrainer()
model_trainer.train()
Here's the entire train.py file.
from datetime import datetime
from os import makedirs
from os.path import join
import torch
import torch.nn as nn
import torch.optim as optim
from .config import config, models_dir
from .dataset import data_loaders
from .model import CNN
class ModelTrainer:
def __init__(self):
self.training_id = "torch---" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = CNN()
self.model = self.model.to(self.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=config['learning_rate'])
self.loss_fn = nn.CrossEntropyLoss()
self.models_dir = join(models_dir, self.training_id)
makedirs(self.models_dir, exist_ok=False)
def train_single_epoch(self):
self.model.train()
for batch_index, (data, target) in enumerate(data_loaders["train"]):
data = data.to(self.device)
target = target.to(self.device)
self.optimizer.zero_grad()
output = self.model(data)
loss = self.loss_fn(output, target)
loss.backward()
self.optimizer.step()
if batch_index % config['log_batch_interval'] == 0:
print(f" {batch_index} / {len(data_loaders['train'])}")
def test(self):
self.model.eval()
sum_test_loss = 0
num_correct = 0
with torch.no_grad():
for data, target in data_loaders["test"]:
data = data.to(self.device)
target = target.to(self.device)
output = self.model(data)
sum_test_loss += self.loss_fn(output, target).item()
prediction = output.argmax(dim=1, keepdim=True)
num_correct += prediction.eq(target.view_as(prediction)).sum().item()
average_test_loss = sum_test_loss / len(data_loaders['test'].dataset)
accuracy = num_correct / len(data_loaders['test'].dataset)
print(f" Average loss: {average_test_loss}")
print(f" Accuracy: {accuracy}")
def save_weights(self, file_path):
torch.save(self.model.state_dict(), file_path)
def train(self):
for epoch_index in range(config['training_num_epochs']):
print(f"Epoch: {epoch_index}")
self.train_single_epoch()
model_save_file_path = join(self.models_dir, f"epoch-{epoch_index}.pt")
self.save_weights(model_save_file_path)
self.test()
if __name__ == "__main__":
model_trainer = ModelTrainer()
model_trainer.train()
Still inside the src folder, we create another file called test.py. Let's import the required packages and load our model.
from json import load as json_load
from os import listdir
from os.path import join as path_join
import cv2
import torch
from torchvision import transforms
from .config import data_dir, models_dir
from .dataset import data_loaders
from .model import CNN
# MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "training_id", "epoch-epoch_numbe")
MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "torch---2024-04-23-08-25-15", "epoch-9.pt")
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN()
model = model.to(device)
model.load_state_dict(torch.load(MODEL_WEIGHTS_FILE_PATH, map_location=device))
model.eval()
We'll first test on data from our test data loader and then on some custom images.
This would be similar to the ModelTrainer.test function defined in the train.py file.
print("Testing on MNIST Test Set")
num_correct = 0
with torch.no_grad():
for data, target in data_loaders["test"]:
data = data.to(device)
target = target.to(device)
output = model(data)
prediction = output.argmax(dim=1, keepdim=True)
num_correct += prediction.eq(target.view_as(prediction)).sum().item()
accuracy = num_correct / len(data_loaders['test'].dataset)
print(f" Accuracy: {accuracy}")
print()
Inside our data directory, I have created another folder called test_images containing some 28x28 pngs with a black background and digits drawn in white. I also have a file called test_images_labels.json that contains labels for each of the images. Here is the labels file.
{
"1.png": 1,
"2.png": 7,
"3.png": 2,
"4.png": 9,
"5.png": 8,
"6.png": 5,
"7.png": 1,
"8.png": 7,
"9.png": 1,
"10.png": 7,
"11.png": 7,
"12.png": 0,
"13.png": 5,
"14.png": 3,
"15.png": 2,
"16.png": 1,
"17.png": 0,
"18.png": 8,
"19.png": 7,
"20.png": 4
}
We want to preprocess our testing images exactly in the same way we preprocessed our training data, so we define a set of transforms for the same.
preprocessing_transforms = transforms.Compose([
transforms.ToTensor(),
])
We can now load the images, get predictions on them, and calculate the accuracy.
test_images_dir = path_join(data_dir, "test_images")
test_images_labels_file_path = path_join(data_dir, "test_images_labels.json")
with open(test_images_labels_file_path, 'r') as test_images_labels_file:
test_images_labels = json_load(test_images_labels_file)
test_images = []
test_labels = []
for image_name in listdir(test_images_dir):
image_path = path_join(test_images_dir, image_name)
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
test_images.append(image)
test_labels.append(test_images_labels[image_name])
# Preprocess
for i in range(len(test_images)):
test_images[i] = preprocessing_transforms(test_images[i])
test_images[i] = test_images[i].unsqueeze(0)
# Predict
num_correct = 0
for preprocessed_image, label in zip(test_images, test_labels):
output = model(preprocessed_image)
prediction = output.argmax(dim=1, keepdim=True)
prediction = int(prediction)
num_correct += prediction == label
accuracy = num_correct / len(test_images)
print(f"Correct: {num_correct} / {len(test_images)}, Accuracy: {accuracy}")
Here's the entire src/test.py file.
from json import load as json_load
from os import listdir
from os.path import join as path_join
import cv2
import torch
from torchvision import transforms
from .config import data_dir, models_dir
from .dataset import data_loaders
from .model import CNN
# MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "training_id", "epoch-epoch_numbe")
MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "torch---2024-04-23-08-25-15", "epoch-9.pt")
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN()
model = model.to(device)
model.load_state_dict(torch.load(MODEL_WEIGHTS_FILE_PATH, map_location=device))
model.eval()
print("Testing on MNIST Test Set")
num_correct = 0
with torch.no_grad():
for data, target in data_loaders["test"]:
data = data.to(device)
target = target.to(device)
output = model(data)
prediction = output.argmax(dim=1, keepdim=True)
num_correct += prediction.eq(target.view_as(prediction)).sum().item()
accuracy = num_correct / len(data_loaders['test'].dataset)
print(f" Accuracy: {accuracy}")
print()
### Test on Custom Images
preprocessing_transforms = transforms.Compose([
transforms.ToTensor(),
])
# Load Images
test_images_dir = path_join(data_dir, "test_images")
test_images_labels_file_path = path_join(data_dir, "test_images_labels.json")
with open(test_images_labels_file_path, 'r') as test_images_labels_file:
test_images_labels = json_load(test_images_labels_file)
test_images = []
test_labels = []
for image_name in listdir(test_images_dir):
image_path = path_join(test_images_dir, image_name)
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
test_images.append(image)
test_labels.append(test_images_labels[image_name])
# Preprocess
for i in range(len(test_images)):
test_images[i] = preprocessing_transforms(test_images[i])
test_images[i] = test_images[i].unsqueeze(0)
# Predict
num_correct = 0
for preprocessed_image, label in zip(test_images, test_labels):
output = model(preprocessed_image)
prediction = output.argmax(dim=1, keepdim=True)
prediction = int(prediction)
num_correct += prediction == label
accuracy = num_correct / len(test_images)
print(f"Correct: {num_correct} / {len(test_images)}, Accuracy: {accuracy}")
Inside the src folder, create another folder called converters and in it create a file called converter_onnx.py.
from os.path import dirname, join as path_join
import torch
from ..config import models_dir
from ..dataset import data_loaders
from ..model import CNN
# MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "training_id", "epoch-epoch_numbe")
MODEL_WEIGHTS_FILE_PATH = path_join(models_dir, "torch---2024-04-23-08-25-15", "epoch-9.pt")
ONNX_MODEL_FILE_PATH = path_join(dirname(MODEL_WEIGHTS_FILE_PATH), "model.onnx")
if __name__ == "__main__":
device = torch.device("cpu")
torch_model = CNN()
torch_model = torch_model.to(device)
torch_model.load_state_dict(torch.load(MODEL_WEIGHTS_FILE_PATH, map_location=device))
torch_model.eval()
for data, target in data_loaders["test"]:
data = data.to(device)
target = target.to(device)
break
sample_input = data[0].unsqueeze(0)
onnx_model = torch.onnx.dynamo_export(
torch_model,
sample_input
)
onnx_model.save(ONNX_MODEL_FILE_PATH)
We load our model similar to how we load it in the test.py file, and convert it to ONNX using torch.onnx.dynamo_export and save it in the same directory as our torch model.
In this small Post, we have trained a Convolutional Neural Network on the MNIST dataset, tested it on custom images, and exported the model to ONNX. It's nothing fancy but we'll use this model in some of our following tutorials where we build a website in which the user can draw digits and get the model's prediction on the image.
See you next time :)