Irish Sign Language Detection Using Transfer Learning¶

Anna Łukawska, Benny Fülöp, and Danée Knevel

This python notebook is created in order to assess the use of transfer learning for recoginising sign language characters using the Irish Sign Language dataset.

The data used is downloaded from the following github repo and unzipped https://github.com/marlondcu/ISL/tree/master/Frames. The modelling was greatly informed by https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

Loading Required Packages¶

In [1]:
import os
import re
import cv2
import pandas as pd
import numpy as np
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from tabulate import tabulate
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn import metrics
import uuid
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from torchvision.transforms import v2
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from torchinfo import summary
from tempfile import TemporaryDirectory
In [ ]:
#https://github.com/marlondcu/ISL/tree/d1d50bb65540b904e3e0a6ffe0997872c4e9e645/Frames
#https://github.com/marlondcu/ISL/tree/d1d50bb65540b904e3e0a6ffe0997872c4e9e645/Videos
In [ ]:
#git.Repo.clone_from(repo_url, destination_path)

Train Test Split¶

In [3]:
# Train and Test Split 
# Set the path to your Frames folder
frames_path = "ISL/Frames"

# Set the output path for train and test sets
train_path = os.path.join("ISL/SplitDataset", "train")
test_path = os.path.join("ISL/SplitDataset", "test")
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)

# Set the ratio for the train set for 80% it is a quicker than cross validation
train_ratio = 0.8

# Iterate through each person folder
for person_folder in os.listdir(frames_path):
    person_path = os.path.join(frames_path, person_folder)

    # Skip if it's not a directory
    if not os.path.isdir(person_path):
        continue

    # Create train and test folders for the person
    train_person_path = os.path.join(train_path, person_folder)
    test_person_path = os.path.join(test_path, person_folder)
    os.makedirs(train_person_path, exist_ok=True)
    os.makedirs(test_person_path, exist_ok=True)

    # Get the list of image files for the person
    image_files = [f for f in os.listdir(person_path) if f.endswith(".jpg")]

    # Randomly assign each image to the train or test set
    for image_file in image_files:
        if random.uniform(0, 1) < train_ratio:
            src_path = os.path.join(person_path, image_file)
            dst_path = os.path.join(train_person_path, image_file)
            shutil.copy(src_path, dst_path)
        else:
            src_path = os.path.join(person_path, image_file)
            dst_path = os.path.join(test_person_path, image_file)
            shutil.copy(src_path, dst_path)
    # tabulate the splits we ended up with
    print(f"Person {person_folder} - Train set: {len(os.listdir(train_person_path))}, Test set: {len(os.listdir(test_person_path))}")

    # We wrote this just in case to preserve the person information
Person Person1 - Train set: 7090, Test set: 1774
Person Person2 - Train set: 8006, Test set: 1965
Person Person3 - Train set: 7495, Test set: 1825
Person Person4 - Train set: 7681, Test set: 1870
Person Person5 - Train set: 9249, Test set: 2229
Person Person6 - Train set: 7141, Test set: 1789

Data Wrangling¶

This portion reformats the file paths in order to injest the data to pytorch

In [14]:
# helper function to pop subfolders
def pop_folder(folder_path): 
    contents = os.listdir(folder_path)
    
    for item in contents:
        item_path = os.path.join(folder_path, item)
        new_path = os.path.join(os.path.dirname(folder_path), item)
        
        shutil.move(item_path, new_path)
    shutil.rmtree(folder_path)
    
# regex to extract labels from file names    
pattern = r'-(\w)-'

# Extract the singular character between dashes for each file name so we have labels for the folders and a way to sort them
abcs = set([re.search(pattern, file_name).group(1) for file_name in os.listdir(os.path.join(train_path))])

# could create loop for train and test save some lines of code here
    
# Unwrapping the Person subfolders to get all the images in the train and test folders    
for subfolder in os.listdir(os.path.join(train_path)):
    pop_folder(os.path.join(os.path.join(train_path), subfolder))
    
for subfolder in os.listdir(os.path.join(test_path)):
    pop_folder(os.path.join(os.path.join(test_path), subfolder))


# Creat Subfolders in train and test with the label names
for value in abcs:
    folder_path = os.path.join(train_path, value)
    os.makedirs(folder_path, exist_ok=True)
    
for value in abcs:
    folder_path = os.path.join(test_path, value)
    os.makedirs(folder_path, exist_ok=True)
    

file_names = os.listdir(train_path)

# Move files to their corresponding folders
for file_name in file_names:
    match = re.search(pattern, file_name)
    if match:
        value = match.group(1)
        source_path = os.path.join(train_path, file_name)
        destination_path = os.path.join(train_path, value, file_name)
        
        # Ensure the destination folder exists before moving
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        
        shutil.move(source_path, destination_path)
        
file_names = os.listdir(test_path)

for file_name in file_names:
    match = re.search(pattern, file_name)
    if match:
        value = match.group(1)
        source_path = os.path.join(test_path, file_name)
        destination_path = os.path.join(test_path, value, file_name)

        # Ensure the destination folder exists before moving
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)

        shutil.move(source_path, destination_path)
In [3]:
# cell to run in case data wrangling had been done, just re asserts the 
train_path = os.path.join("ISL", "SplitDataset", "train")
test_path = os.path.join( "ISL", "SplitDataset", "test")

Setting up Data Loading and Transforms¶

In [53]:
data_transforms = {
    'train': v2.Compose([
        #v2.RandomResizedCrop(224), # To deal with missing parts of the hand in the detection area, this caused errors later on
        v2.RandomHorizontalFlip(), # To deal with left and right hands, dataset only contains right hands
        v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.)), # To adress image quality issues
        v2.RandomInvert(), # The images are black and white, this should allow the network to deal with white backgrounds, maybe differentiate the hands
        v2.RandomPerspective(distortion_scale=0.4, p=0.5), # To adress different angles of the images
        transforms.ToTensor(), # convert to tensor object
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # utilising the imagenet norms as we will use imagenet weights
    ]),
    'test': v2.Compose([
        v2.Resize(256),
        v2.CenterCrop(224),
        v2.RandomHorizontalFlip(p=0.5), # we utilise flipping in testing as well as it is reasonable for the sign to be left or right handed in real life as well
        transforms.ToTensor(),
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir =  os.path.join("ISL", "SplitDataset") # directory of the data
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']} # dict comprehension to set train and 

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=30,
                                             shuffle=True, num_workers=24)
              for x in ['train', 'test']} # creating the dataloaders and specifying batch size and number of cores ##### adjust if needed

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']} # get dataset sizes
class_names = image_datasets['train'].classes # get class labels

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # we trained on gpu
In [56]:
# Tabulating a batch of the data to see what we will train on

def imshow(inp, title=None):
    """Display image for Tensor."""
    #plt.figure(figsize=(20,10))
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data 
inputs, classes = next(iter(dataloaders['train'])) 

# Make a grid from batch
out = torchvision.utils.make_grid(inputs) 

imshow(out, title=[class_names[x] for x in classes])
No description has been provided for this image

Training¶

In [5]:
# defining train function
# this is from the pytorch tutorial

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'test']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    #print(labels)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'test' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best test Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model
ResNet18 Architecture¶
In [8]:
model_res_18 = models.resnet18(weights='IMAGENET1K_V1') # pulling the resnet18 architecture with the weights from imagenet
num_ftrs = model_res_18.fc.in_features # getting the input features dimensions

model_res_18.fc = nn.Linear(num_ftrs,len(class_names)) #specifying the classification layer

model_ft = model_res_18.to(device) # sending the model to the gpu

criterion = nn.CrossEntropyLoss() # training criterion is cross entropy loss

# Observe that all parameters are being optimized 
optimizer_ft = optim.SGD(model_res_18.parameters(), lr=0.001, momentum=0.9) 

# Decay LR by a factor of 0.1 every 7 epochs to adjust learning rate
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1,verbose = True))
In [9]:
model_res_18 = train_model(model_res_18, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
train Loss: 1.7720 Acc: 0.4592
test Loss: 0.3620 Acc: 0.8855

Epoch 1/9
----------
train Loss: 1.2564 Acc: 0.6153
test Loss: 0.1804 Acc: 0.9364

Epoch 2/9
----------
train Loss: 1.1345 Acc: 0.6526
test Loss: 0.1602 Acc: 0.9466

Epoch 3/9
----------
train Loss: 1.0538 Acc: 0.6771
test Loss: 0.0867 Acc: 0.9700

Epoch 4/9
----------
train Loss: 1.0093 Acc: 0.6901
test Loss: 0.1193 Acc: 0.9579

Epoch 5/9
----------
train Loss: 0.9787 Acc: 0.7006
test Loss: 0.0805 Acc: 0.9743

Epoch 6/9
----------
train Loss: 0.9616 Acc: 0.7042
test Loss: 0.0597 Acc: 0.9804

Epoch 7/9
----------
train Loss: 0.8875 Acc: 0.7307
test Loss: 0.0457 Acc: 0.9839

Epoch 8/9
----------
train Loss: 0.8696 Acc: 0.7348
test Loss: 0.0369 Acc: 0.9884

Epoch 9/9
----------
train Loss: 0.8524 Acc: 0.7396
test Loss: 0.0315 Acc: 0.9908

Training complete in 21m 50s
Best test Acc: 0.990831
In [12]:
torch.save(model_res_18.state_dict(), os.path.join( , resnet18_10_aug.pt") #saving the model 

ResNet18 Architecture Re-load model¶

In [3]:
# Instantiate the model
model_res_18 = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model_res_18.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_18.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\resnet18_10_aug.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_18.load_state_dict(state_dict)
Out[3]:
<All keys matched successfully>
ResNet 50 Architecture¶
In [6]:
model_res_50 = models.resnet50(weights='IMAGENET1K_V1')
num_ftrs = model_res_50.fc.in_features

model_res_50.fc = nn.Linear(num_ftrs,len(class_names))

model_res_50 = model_res_50.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized~
optimizer_ft = optim.SGD(model_res_50.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1, verbose = True)
Adjusting learning rate of group 0 to 1.0000e-03.
In [7]:
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
model_res_50 = train_model(model_res_50, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 1.6998 Acc: 0.4915
test Loss: 0.2352 Acc: 0.9241

Epoch 1/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 1.1395 Acc: 0.6530
test Loss: 0.1282 Acc: 0.9609

Epoch 2/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 1.0313 Acc: 0.6848
test Loss: 0.0930 Acc: 0.9687

Epoch 3/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 0.9205 Acc: 0.7204
test Loss: 0.0523 Acc: 0.9838

Epoch 4/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 0.8864 Acc: 0.7289
test Loss: 0.0516 Acc: 0.9831

Epoch 5/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8898 Acc: 0.7296
test Loss: 0.0511 Acc: 0.9836

Epoch 6/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8699 Acc: 0.7352
test Loss: 0.0445 Acc: 0.9852

Epoch 7/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8773 Acc: 0.7337
test Loss: 0.0473 Acc: 0.9845

Epoch 8/9
----------
Adjusting learning rate of group 0 to 1.0000e-06.
train Loss: 0.8691 Acc: 0.7345
test Loss: 0.0463 Acc: 0.9849

Epoch 9/9
----------
Adjusting learning rate of group 0 to 1.0000e-06.
train Loss: 0.8663 Acc: 0.7367
test Loss: 0.0442 Acc: 0.9847

Training complete in 54m 55s
Best test Acc: 0.985155
In [8]:
torch.save(model_res_50.state_dict(), r"C:\Users\azabe\Desktop\Masters\AppliedAI\resnet50_10_aug.pt")

ResNet50 Architecture Re-Load¶

In [4]:
# Instantiate the model
model_res_50 = models.resnet50(weights='IMAGENET1K_V1')
num_ftrs = model_res_50.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_50.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\resnet50_10_aug.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_50.load_state_dict(state_dict)
Out[4]:
<All keys matched successfully>
ResNet 101 Architecture¶
In [52]:
model_res_101 = models.resnet101(weights='IMAGENET1K_V1')
num_ftrs = model_res_101.fc.in_features

model_res_101.fc = nn.Linear(num_ftrs,len(class_names))

model_res_101 = model_res_101.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized~
optimizer_ft = optim.SGD(model_res_101.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1, verbose = True)
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to C:\Users\azabe/.cache\torch\hub\checkpoints\resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:14<00:00, 12.5MB/s] 
Adjusting learning rate of group 0 to 1.0000e-03.
In [54]:
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
model_res_101 = train_model(model_res_101, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 1.1167 Acc: 0.6588
test Loss: 0.1308 Acc: 0.9524

Epoch 1/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 1.0094 Acc: 0.6907
test Loss: 0.1188 Acc: 0.9564

Epoch 2/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 0.8957 Acc: 0.7267
test Loss: 0.0560 Acc: 0.9798

Epoch 3/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 0.8792 Acc: 0.7305
test Loss: 0.0456 Acc: 0.9845

Epoch 4/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8398 Acc: 0.7443
test Loss: 0.0435 Acc: 0.9857

Epoch 5/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8401 Acc: 0.7436
test Loss: 0.0416 Acc: 0.9862

Epoch 6/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 0.8358 Acc: 0.7453
test Loss: 0.0420 Acc: 0.9865

Epoch 7/9
----------
Adjusting learning rate of group 0 to 1.0000e-06.
train Loss: 0.8268 Acc: 0.7483
test Loss: 0.0404 Acc: 0.9869

Epoch 8/9
----------
Adjusting learning rate of group 0 to 1.0000e-06.
train Loss: 0.8386 Acc: 0.7444
test Loss: 0.0369 Acc: 0.9888

Epoch 9/9
----------
Adjusting learning rate of group 0 to 1.0000e-06.
train Loss: 0.8444 Acc: 0.7420
test Loss: 0.0361 Acc: 0.9889

Training complete in 75m 28s
Best test Acc: 0.988910
In [55]:
torch.save(model_res_101.state_dict(), r"C:\Users\azabe\Desktop\Masters\AppliedAI\resnet101_10_aug.pt")

ResNet 101 Architecture Re-Load¶

In [5]:
# Instantiate the model
model_res_101 = models.resnet101(weights='IMAGENET1K_V1')
num_ftrs = model_res_101.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_101.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\resnet101_10_aug.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_101.load_state_dict(state_dict)
Out[5]:
<All keys matched successfully>
ResNet18 as feature extractor¶
In [10]:
model_res_18_l = models.resnet18(weights='IMAGENET1K_V1')
for param in model_res_18_l.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_res_18_l.fc.in_features
model_res_18_l.fc = nn.Linear(num_ftrs, len(class_names))

model_res_18_l = model_res_18_l.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_fe = optim.SGD(model_res_18_l.fc.parameters(), lr=0.01, momentum=0.8)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_fe, step_size=3, gamma=0.1, verbose = True)
Adjusting learning rate of group 0 to 1.0000e-02.
In [11]:
model_res_18_l= train_model(model_res_18_l, criterion, optimizer_fe, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
Adjusting learning rate of group 0 to 1.0000e-02.
train Loss: 2.9155 Acc: 0.1711
test Loss: 2.2564 Acc: 0.2979

Epoch 1/9
----------
Adjusting learning rate of group 0 to 1.0000e-02.
train Loss: 2.7058 Acc: 0.2291
test Loss: 2.0205 Acc: 0.3588

Epoch 2/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.6646 Acc: 0.2402
test Loss: 1.9677 Acc: 0.3823

Epoch 3/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.4678 Acc: 0.2827
test Loss: 1.7902 Acc: 0.4358

Epoch 4/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.4535 Acc: 0.2903
test Loss: 1.8038 Acc: 0.4319

Epoch 5/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.4489 Acc: 0.2908
test Loss: 1.7621 Acc: 0.4538

Epoch 6/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.4237 Acc: 0.2993
test Loss: 1.7672 Acc: 0.4505

Epoch 7/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.4301 Acc: 0.2987
test Loss: 1.7725 Acc: 0.4520

Epoch 8/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 2.4248 Acc: 0.2981
test Loss: 1.7778 Acc: 0.4539

Epoch 9/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 2.4274 Acc: 0.2968
test Loss: 1.7802 Acc: 0.4582

Training complete in 20m 1s
Best test Acc: 0.458173
In [12]:
torch.save(model_res_18_l.state_dict(), r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_18_l.pt")

ResNet18 as feature extractor Re-Load¶

In [7]:
# Instantiate the model
model_res_18_l = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model_res_18_l.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_18_l.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_18_l.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_18_l.load_state_dict(state_dict)
Out[7]:
<All keys matched successfully>
ResNet50 as feature extractor¶
In [19]:
model_res_50_l = models.resnet50(weights='IMAGENET1K_V1')
for param in model_res_50_l.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_res_50_l.fc.in_features
model_res_50_l.fc = nn.Linear(num_ftrs, len(class_names))

model_res_50_l = model_res_50_l.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_fe = optim.SGD(model_res_50_l.fc.parameters(), lr=0.01, momentum=0.8)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_fe, step_size=3, gamma=0.1, verbose = True)
In [21]:
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
model_res_50_l= train_model(model_res_50_l, criterion, optimizer_fe, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
train Loss: 2.8312 Acc: 0.2005
test Loss: 1.9983 Acc: 0.3914

Epoch 1/9
----------
train Loss: 2.7340 Acc: 0.2262
test Loss: 1.9950 Acc: 0.3709

Epoch 2/9
----------
train Loss: 2.7144 Acc: 0.2340
test Loss: 2.1773 Acc: 0.3353

Epoch 3/9
----------
train Loss: 2.6883 Acc: 0.2400
test Loss: 1.9026 Acc: 0.4053

Epoch 4/9
----------
train Loss: 2.6600 Acc: 0.2477
test Loss: 1.8347 Acc: 0.4276

Epoch 5/9
----------
train Loss: 2.6329 Acc: 0.2531
test Loss: 1.8490 Acc: 0.4200

Epoch 6/9
----------
train Loss: 2.6005 Acc: 0.2635
test Loss: 1.7657 Acc: 0.4439

Epoch 7/9
----------
train Loss: 2.4311 Acc: 0.3002
test Loss: 1.6734 Acc: 0.4849

Epoch 8/9
----------
train Loss: 2.4284 Acc: 0.3006
test Loss: 1.6856 Acc: 0.4878

Epoch 9/9
----------
train Loss: 2.4173 Acc: 0.3029
test Loss: 1.6717 Acc: 0.4872

Training complete in 32m 18s
Best test Acc: 0.487775
In [22]:
torch.save(model_res_50_l.state_dict(), r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_50_l.pt")

ResNet50 as feature extractor Re-Load¶

In [6]:
# Instantiate the model
model_res_50_l = models.resnet50(weights='IMAGENET1K_V1')
num_ftrs = model_res_50_l.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_50_l.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_50_l.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_50_l.load_state_dict(state_dict)
Out[6]:
<All keys matched successfully>
ResNet101 as feature extractor¶
In [59]:
model_res_101_l = models.resnet101(weights='IMAGENET1K_V1')
for param in model_res_101_l.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_res_101_l.fc.in_features
model_res_101_l.fc = nn.Linear(num_ftrs, len(class_names))

model_res_101_l = model_res_101_l.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_fe = optim.SGD(model_res_101_l.fc.parameters(), lr=0.01, momentum=0.8)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_fe, step_size=3, gamma=0.1, verbose = True)
Adjusting learning rate of group 0 to 1.0000e-02.
In [60]:
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
model_res_101_l= train_model(model_res_101_l, criterion, optimizer_fe, exp_lr_scheduler,
                       num_epochs=10)
Epoch 0/9
----------
Adjusting learning rate of group 0 to 1.0000e-02.
train Loss: 2.9061 Acc: 0.1776
test Loss: 2.1546 Acc: 0.3148

Epoch 1/9
----------
Adjusting learning rate of group 0 to 1.0000e-02.
train Loss: 2.7187 Acc: 0.2298
test Loss: 2.0695 Acc: 0.3702

Epoch 2/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.6669 Acc: 0.2461
test Loss: 1.9569 Acc: 0.3590

Epoch 3/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.4339 Acc: 0.2971
test Loss: 1.7898 Acc: 0.4335

Epoch 4/9
----------
Adjusting learning rate of group 0 to 1.0000e-03.
train Loss: 2.4324 Acc: 0.2963
test Loss: 1.7604 Acc: 0.4509

Epoch 5/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.4184 Acc: 0.3022
test Loss: 1.7804 Acc: 0.4397

Epoch 6/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.3988 Acc: 0.3088
test Loss: 1.7612 Acc: 0.4497

Epoch 7/9
----------
Adjusting learning rate of group 0 to 1.0000e-04.
train Loss: 2.3974 Acc: 0.3098
test Loss: 1.7708 Acc: 0.4475

Epoch 8/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 2.3933 Acc: 0.3092
test Loss: 1.7422 Acc: 0.4531

Epoch 9/9
----------
Adjusting learning rate of group 0 to 1.0000e-05.
train Loss: 2.3929 Acc: 0.3101
test Loss: 1.7735 Acc: 0.4371

Training complete in 39m 6s
Best test Acc: 0.453109
In [61]:
torch.save(model_res_101_l.state_dict(), r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_101_l.pt")

ResNet101 as feature extractor Re-Load¶

In [8]:
# Instantiate the model
model_res_101_l = models.resnet101(weights='IMAGENET1K_V1')
num_ftrs = model_res_101_l.fc.in_features
# Modify the final fully connected layer to match the number of classes in the loaded state_dict
num_classes = 26
model_res_101_l.fc = torch.nn.Linear(in_features=num_ftrs, out_features=num_classes)

# Load the state dictionary from the file
state_dict = torch.load(r"C:\Users\azabe\Desktop\Masters\AppliedAI\model_res_101_l.pt", map_location='cpu')

# Load the state dictionary into the model
model_res_101_l.load_state_dict(state_dict)
Out[8]:
<All keys matched successfully>

Model evaluation¶

Tables of Accuracy¶
In [16]:
model_names = ['model_res_101_l', 'model_res_50_l', 'model_res_18_l', 'model_res_101', 'model_res_50', 'model_res_18']

# Initialize a list to store accuracy scores
accuracy_scores = []

model_res_101_l.to(device)
model_res_50_l.to(device)
model_res_18_l.to(device)
model_res_101.to(device)
model_res_50.to(device)
model_res_18.to(device)

# Function to evaluate a model and return accuracy
def evaluate_model(model, dataloader):
    model.eval()
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            true_labels.extend(labels.cpu().numpy())
            predicted_labels.extend(preds.cpu().numpy())

    accuracy = metrics.accuracy_score(true_labels, predicted_labels)
    return accuracy

# Evaluate each model and store accuracy scores
for model in [model_res_101_l, model_res_50_l, model_res_18_l, model_res_101, model_res_50, model_res_18]:
    accuracy = evaluate_model(model, dataloaders['test'])
    accuracy_scores.append(accuracy)

# Create a table
table_data = list(zip(model_names, accuracy_scores))
table_headers = ["Model Name", "Accuracy"]
table = tabulate(table_data, headers=table_headers, tablefmt="grid")

# Print the table
print(table)
+-----------------+------------+
| Model Name      |   Accuracy |
+=================+============+
| model_res_101_l |   0.455117 |
+-----------------+------------+
| model_res_50_l  |   0.48437  |
+-----------------+------------+
| model_res_18_l  |   0.454942 |
+-----------------+------------+
| model_res_101   |   0.988736 |
+-----------------+------------+
| model_res_50    |   0.985854 |
+-----------------+------------+
| model_res_18    |   0.990482 |
+-----------------+------------+

Code to creat validation set easily

In [ ]:
#  this, this is the code we used to create our own validation set
path = os.path.join("ISL", "Validation") # where we write the results

# parameters of the image capture
abcs = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
num_img = 11 # number of images to caputre, had to use 11 as on my system it takes a frame to initialise the camera

for label in abcs:
    os.makedirs(os.path.join(path, label), exist_ok=True)
    cap = cv2.VideoCapture(0)
    print(f"Recording images for {label}") # show what we will record
    time.sleep(5) # time between recordings
    for imgnum in range(num_img):
        ret, frame = cap.read()
        imagename = os.path.join(path, label, "."+"{}.jpg".format(str(uuid.uuid1()))) # generate unique file names
        cv2.imwrite(imagename, frame)
        cv2.imshow("frame", frame)
        time.sleep(2) # give time to move between captures
        
        if cv2.waitKey(1) && 0xFF == ord(q): # quit criterion to exit the program
            break
    cap.release()

Open validation set, transform it the same way we transformed the test set to keep things fair and create data loader for prediction

In [57]:
eval_path = os.path.join("ISL", "Our pictures (1)-20240117T150704Z-001","Our pictures (1)")

# Define the evaluation data transform
eval_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create an evaluation dataset
eval_dataset = datasets.ImageFolder(eval_path, eval_transform)

# Create an evaluation dataloader
eval_dataloader = torch.utils.data.DataLoader(eval_dataset, batch_size=30, shuffle=True, num_workers=24)
In [44]:
# Evaluate each model on the evaluation set
eval_accuracy_scores = []

for model in [model_res_101_l, model_res_50_l, model_res_18_l, model_res_101, model_res_50, model_res_18]:
    accuracy = evaluate_model(model, eval_dataloader)
    eval_accuracy_scores.append(accuracy)

# Create a table for evaluation results
eval_table_data = list(zip(model_names, eval_accuracy_scores))
eval_table_headers = ["Model Name", "Accuracy"]
eval_table = tabulate(eval_table_data, headers=eval_table_headers, tablefmt="grid")

# Print the evaluation table
print("Evaluation Results:")
print(eval_table)
Evaluation Results:
+-----------------+------------+
| Model Name      |   Accuracy |
+=================+============+
| model_res_101_l |  0.0373134 |
+-----------------+------------+
| model_res_50_l  |  0.0410448 |
+-----------------+------------+
| model_res_18_l  |  0.0447761 |
+-----------------+------------+
| model_res_101   |  0.597015  |
+-----------------+------------+
| model_res_50    |  0.61194   |
+-----------------+------------+
| model_res_18    |  0.447761  |
+-----------------+------------+

Visualising model results¶

In [40]:
def visualize_model(model, num_images=6, test_data = dataloaders['test']):  # checking the categorisation via testing on some images
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_data):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            #print(outputs) for checking the output 
            _, preds = torch.max(outputs, 1)
            

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)
In [41]:
visualize_model(model_res_50, 8, dataloaders['test']) # the best model the fully trained ResNet50 seem to perform very well on the test set
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [47]:
visualize_model(model_res_50, 8, eval_dataloader) # The model correctly classifies some signs and struggles with others on data that it hasn't seen
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Plotting Confusion Matrices¶
In [23]:
# Set the model to evaluation mode
model_res_101.eval()

# Initialize lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the test set and make predictions
with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model_res_101(inputs)
        _, preds = torch.max(outputs, 1)

        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

# Create a confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Print classification report
print("Classification Report:")
print(classification_report(true_labels, predicted_labels, target_names=class_names))

# Calculate overall accuracy
accuracy = metrics.accuracy_score(true_labels, predicted_labels)
print(f"Overall Accuracy: {accuracy}")
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00       422
           B       1.00      1.00      1.00       449
           C       1.00      1.00      1.00       428
           D       1.00      0.98      0.99       496
           E       0.99      0.98      0.99       497
           F       1.00      0.98      0.99       467
           G       0.98      1.00      0.99       438
           H       0.95      0.98      0.96       483
           I       1.00      0.99      0.99       452
           J       1.00      1.00      1.00       407
           K       1.00      0.97      0.98       471
           L       1.00      1.00      1.00       434
           M       1.00      1.00      1.00       441
           N       1.00      0.96      0.98       388
           O       0.97      1.00      0.99       455
           P       0.97      0.98      0.98       470
           Q       0.95      0.98      0.97       478
           R       1.00      0.98      0.99       445
           S       1.00      1.00      1.00       433
           T       1.00      1.00      1.00       448
           U       0.96      0.99      0.98       415
           V       1.00      0.97      0.98       470
           W       0.97      0.99      0.98       474
           X       1.00      1.00      1.00       295
           Y       1.00      1.00      1.00       435
           Z       1.00      1.00      1.00       361

    accuracy                           0.99     11452
   macro avg       0.99      0.99      0.99     11452
weighted avg       0.99      0.99      0.99     11452

Overall Accuracy: 0.988822913028292
In [24]:
# Set the model to evaluation mode
model_res_101_l.eval()

# Initialize lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the test set and make predictions
with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model_res_101_l(inputs)
        _, preds = torch.max(outputs, 1)

        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

# Create a confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Print classification report
print("Classification Report:")
print(classification_report(true_labels, predicted_labels, target_names=class_names))

# Calculate overall accuracy
accuracy = metrics.accuracy_score(true_labels, predicted_labels)
print(f"Overall Accuracy: {accuracy}")
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           A       0.68      0.33      0.45       422
           B       0.50      0.44      0.47       449
           C       0.80      0.74      0.77       428
           D       0.49      0.18      0.26       496
           E       0.56      0.60      0.58       497
           F       0.42      0.24      0.31       467
           G       0.33      0.47      0.39       438
           H       0.45      0.25      0.32       483
           I       0.63      0.30      0.41       452
           J       0.26      0.66      0.37       407
           K       0.61      0.15      0.24       471
           L       0.46      0.86      0.60       434
           M       0.59      0.60      0.59       441
           N       0.51      0.51      0.51       388
           O       0.64      0.61      0.62       455
           P       0.33      0.46      0.38       470
           Q       0.27      0.27      0.27       478
           R       0.54      0.14      0.22       445
           S       0.52      0.26      0.34       433
           T       0.43      0.41      0.42       448
           U       0.30      0.25      0.27       415
           V       0.36      0.50      0.42       470
           W       0.30      0.63      0.41       474
           X       0.83      0.87      0.85       295
           Y       0.50      0.56      0.53       435
           Z       0.59      0.73      0.66       361

    accuracy                           0.45     11452
   macro avg       0.50      0.46      0.45     11452
weighted avg       0.49      0.45      0.44     11452

Overall Accuracy: 0.45153684945860983
In [52]:
# Set the model to evaluation mode
model_res_50.eval()

# Initialize lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through the test set and make predictions
with torch.no_grad():
    for inputs, labels in eval_dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model_res_50(inputs)
        _, preds = torch.max(outputs, 1)

        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(preds.cpu().numpy())

# Create a confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Print classification report
print("Classification Report:")
print(classification_report(true_labels, predicted_labels, target_names=class_names))

# Calculate overall accuracy
accuracy = metrics.accuracy_score(true_labels, predicted_labels)
print(f"Overall Accuracy: {accuracy}")
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00        10
           B       1.00      1.00      1.00        10
           C       0.56      1.00      0.71        10
           D       0.91      1.00      0.95        10
           E       0.75      0.60      0.67        10
           F       0.25      0.10      0.14        10
           G       0.26      1.00      0.42        10
           H       1.00      1.00      1.00        10
           I       1.00      0.30      0.46        10
           J       0.00      0.00      0.00        10
           K       0.00      0.00      0.00        10
           L       0.91      1.00      0.95        10
           M       0.29      0.60      0.39        10
           N       0.29      0.20      0.24        10
           O       0.82      0.90      0.86        10
           P       0.64      0.90      0.75        10
           Q       1.00      0.40      0.57        10
           R       0.64      0.70      0.67        10
           S       0.25      0.10      0.14        10
           T       0.33      0.40      0.36        10
           U       1.00      0.86      0.92        14
           V       1.00      0.92      0.96        12
           W       1.00      0.50      0.67        12
           X       0.38      0.30      0.33        10
           Y       0.90      0.90      0.90        10
           Z       0.12      0.10      0.11        10

    accuracy                           0.61       268
   macro avg       0.63      0.61      0.58       268
weighted avg       0.64      0.61      0.59       268

Overall Accuracy: 0.6119402985074627
In [26]:
summary(model_res_50) # tabulate the best architecture
Out[26]:
=================================================================
Layer (type:depth-idx)                   Param #
=================================================================
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─Bottleneck: 2-1                   --
│    │    └─Conv2d: 3-1                  4,096
│    │    └─BatchNorm2d: 3-2             128
│    │    └─Conv2d: 3-3                  36,864
│    │    └─BatchNorm2d: 3-4             128
│    │    └─Conv2d: 3-5                  16,384
│    │    └─BatchNorm2d: 3-6             512
│    │    └─ReLU: 3-7                    --
│    │    └─Sequential: 3-8              16,896
│    └─Bottleneck: 2-2                   --
│    │    └─Conv2d: 3-9                  16,384
│    │    └─BatchNorm2d: 3-10            128
│    │    └─Conv2d: 3-11                 36,864
│    │    └─BatchNorm2d: 3-12            128
│    │    └─Conv2d: 3-13                 16,384
│    │    └─BatchNorm2d: 3-14            512
│    │    └─ReLU: 3-15                   --
│    └─Bottleneck: 2-3                   --
│    │    └─Conv2d: 3-16                 16,384
│    │    └─BatchNorm2d: 3-17            128
│    │    └─Conv2d: 3-18                 36,864
│    │    └─BatchNorm2d: 3-19            128
│    │    └─Conv2d: 3-20                 16,384
│    │    └─BatchNorm2d: 3-21            512
│    │    └─ReLU: 3-22                   --
├─Sequential: 1-6                        --
│    └─Bottleneck: 2-4                   --
│    │    └─Conv2d: 3-23                 32,768
│    │    └─BatchNorm2d: 3-24            256
│    │    └─Conv2d: 3-25                 147,456
│    │    └─BatchNorm2d: 3-26            256
│    │    └─Conv2d: 3-27                 65,536
│    │    └─BatchNorm2d: 3-28            1,024
│    │    └─ReLU: 3-29                   --
│    │    └─Sequential: 3-30             132,096
│    └─Bottleneck: 2-5                   --
│    │    └─Conv2d: 3-31                 65,536
│    │    └─BatchNorm2d: 3-32            256
│    │    └─Conv2d: 3-33                 147,456
│    │    └─BatchNorm2d: 3-34            256
│    │    └─Conv2d: 3-35                 65,536
│    │    └─BatchNorm2d: 3-36            1,024
│    │    └─ReLU: 3-37                   --
│    └─Bottleneck: 2-6                   --
│    │    └─Conv2d: 3-38                 65,536
│    │    └─BatchNorm2d: 3-39            256
│    │    └─Conv2d: 3-40                 147,456
│    │    └─BatchNorm2d: 3-41            256
│    │    └─Conv2d: 3-42                 65,536
│    │    └─BatchNorm2d: 3-43            1,024
│    │    └─ReLU: 3-44                   --
│    └─Bottleneck: 2-7                   --
│    │    └─Conv2d: 3-45                 65,536
│    │    └─BatchNorm2d: 3-46            256
│    │    └─Conv2d: 3-47                 147,456
│    │    └─BatchNorm2d: 3-48            256
│    │    └─Conv2d: 3-49                 65,536
│    │    └─BatchNorm2d: 3-50            1,024
│    │    └─ReLU: 3-51                   --
├─Sequential: 1-7                        --
│    └─Bottleneck: 2-8                   --
│    │    └─Conv2d: 3-52                 131,072
│    │    └─BatchNorm2d: 3-53            512
│    │    └─Conv2d: 3-54                 589,824
│    │    └─BatchNorm2d: 3-55            512
│    │    └─Conv2d: 3-56                 262,144
│    │    └─BatchNorm2d: 3-57            2,048
│    │    └─ReLU: 3-58                   --
│    │    └─Sequential: 3-59             526,336
│    └─Bottleneck: 2-9                   --
│    │    └─Conv2d: 3-60                 262,144
│    │    └─BatchNorm2d: 3-61            512
│    │    └─Conv2d: 3-62                 589,824
│    │    └─BatchNorm2d: 3-63            512
│    │    └─Conv2d: 3-64                 262,144
│    │    └─BatchNorm2d: 3-65            2,048
│    │    └─ReLU: 3-66                   --
│    └─Bottleneck: 2-10                  --
│    │    └─Conv2d: 3-67                 262,144
│    │    └─BatchNorm2d: 3-68            512
│    │    └─Conv2d: 3-69                 589,824
│    │    └─BatchNorm2d: 3-70            512
│    │    └─Conv2d: 3-71                 262,144
│    │    └─BatchNorm2d: 3-72            2,048
│    │    └─ReLU: 3-73                   --
│    └─Bottleneck: 2-11                  --
│    │    └─Conv2d: 3-74                 262,144
│    │    └─BatchNorm2d: 3-75            512
│    │    └─Conv2d: 3-76                 589,824
│    │    └─BatchNorm2d: 3-77            512
│    │    └─Conv2d: 3-78                 262,144
│    │    └─BatchNorm2d: 3-79            2,048
│    │    └─ReLU: 3-80                   --
│    └─Bottleneck: 2-12                  --
│    │    └─Conv2d: 3-81                 262,144
│    │    └─BatchNorm2d: 3-82            512
│    │    └─Conv2d: 3-83                 589,824
│    │    └─BatchNorm2d: 3-84            512
│    │    └─Conv2d: 3-85                 262,144
│    │    └─BatchNorm2d: 3-86            2,048
│    │    └─ReLU: 3-87                   --
│    └─Bottleneck: 2-13                  --
│    │    └─Conv2d: 3-88                 262,144
│    │    └─BatchNorm2d: 3-89            512
│    │    └─Conv2d: 3-90                 589,824
│    │    └─BatchNorm2d: 3-91            512
│    │    └─Conv2d: 3-92                 262,144
│    │    └─BatchNorm2d: 3-93            2,048
│    │    └─ReLU: 3-94                   --
├─Sequential: 1-8                        --
│    └─Bottleneck: 2-14                  --
│    │    └─Conv2d: 3-95                 524,288
│    │    └─BatchNorm2d: 3-96            1,024
│    │    └─Conv2d: 3-97                 2,359,296
│    │    └─BatchNorm2d: 3-98            1,024
│    │    └─Conv2d: 3-99                 1,048,576
│    │    └─BatchNorm2d: 3-100           4,096
│    │    └─ReLU: 3-101                  --
│    │    └─Sequential: 3-102            2,101,248
│    └─Bottleneck: 2-15                  --
│    │    └─Conv2d: 3-103                1,048,576
│    │    └─BatchNorm2d: 3-104           1,024
│    │    └─Conv2d: 3-105                2,359,296
│    │    └─BatchNorm2d: 3-106           1,024
│    │    └─Conv2d: 3-107                1,048,576
│    │    └─BatchNorm2d: 3-108           4,096
│    │    └─ReLU: 3-109                  --
│    └─Bottleneck: 2-16                  --
│    │    └─Conv2d: 3-110                1,048,576
│    │    └─BatchNorm2d: 3-111           1,024
│    │    └─Conv2d: 3-112                2,359,296
│    │    └─BatchNorm2d: 3-113           1,024
│    │    └─Conv2d: 3-114                1,048,576
│    │    └─BatchNorm2d: 3-115           4,096
│    │    └─ReLU: 3-116                  --
├─AdaptiveAvgPool2d: 1-9                 --
├─Linear: 1-10                           53,274
=================================================================
Total params: 23,561,306
Trainable params: 23,561,306
Non-trainable params: 0
=================================================================

Real time recognition¶

In [ ]:
cap = cv2.VideoCapture(0) # Detect signs in real time

cap.set(3, 700) # setting the dimensions of the capture
cap.set(4, 480)

signs = class_names

while True:
    ret, frame = cap.read()

    img = frame[20:250, 20:250] # the image capture dimensions

    res = cv2.resize(img, dsize=(28, 28), interpolation = cv2.INTER_CUBIC) # resizing 
    res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) # set the color to black and white

    res1 = np.reshape(res, (1, 1, 28, 28)) / 255 # fit the dimensions of the training data
    res1 = np.repeat(res1, 3, axis=1)
    res1 = torch.from_numpy(res1) # create numpy array
    res1 = res1.type(torch.FloatTensor) # create tensor
    
    model_res_50.eval() # set model to evaluation mode

    out = model_res_50(res1) #get probabilities
    # Probabilites 
    probs, label = torch.topk(out, 25)
    probs = torch.nn.functional.softmax(probs, 1)  
    #print(out)

    wht, pred = torch.max(out, 1) # predict label
    #print(wht)
    #print(pred)

    if float(probs[0,0]) < 0.4:
        texto_mostrar = 'Sign not detected'
    else:
        texto_mostrar = signs[int(pred)] + ': ' + '{:.2f}'.format(float(probs[0,0])) + '%'

    font = cv2.FONT_HERSHEY_SIMPLEX
    frame = cv2.putText(frame, texto_mostrar, (60,285), font, 1, (255,0,0), 2, cv2.LINE_AA)

    frame = cv2.rectangle(frame, (20, 20), (250, 250), (0, 255, 0), 3)

    cv2.imshow('Cam', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()