PyTorch

6 minute read

Published:

This lesson covers CNN for CIFAR10

CNN for CIFAR10

topic = "pytorch"
lesson = 9

from n import *
home, models_path = get_project_dir("CIFAR10")

print_(home)
print_(models_path)

/home/naneja/datasets/n/CIFAR10

/home/naneja/datasets/n/CIFAR10/models

# When the minibatch size is multiplied by k, 
# multiply the learning rate by k
batch_size = 128
learning_rate = 0.01
epochs = 10

Data

Training an image classifier

  • Load and normalize the CIFAR10 training and test datasets using torchvision

  • Define a Convolutional Neural Network

  • Define a loss function

  • Train the network on the training data

  • Test the network on the test data

Load and normalize CIFAR10

%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

from tqdm import tqdm

import matplotlib.pyplot as plt
import numpy as np
random.seed(seed)
torch.manual_seed(seed)

trainset = torchvision.datasets.CIFAR10(root=home, 
                                        train=True,
                                        download=True, 
                                        transform=transforms.ToTensor())

trainloader = torch.utils.data.DataLoader(trainset, 
                                          batch_size=batch_size,
                                          shuffle=True, 
                                          num_workers=2)
dataiter = iter(trainloader)
images, labels = dataiter.next()

print_(images.shape)

img = images[0]
print_(img.shape)

#print(img[0][0])

# Image Values are between [0, 1]
print_(f"min = {torch.min(img):.4f}, max = {torch.max(img):.4f}")
Files already downloaded and verified

torch.Size([128, 3, 32, 32])

torch.Size([3, 32, 32])

min = 0.1412, max = 0.9686

Normalize the images

  • x = [0, 1]
  • y = (x - 0.5)/0.5
    • x = 0 -> y = -1
    • x = 1 -> y = +1
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

tfs = [transforms.ToTensor(),
       transforms.Normalize(mean, std)] # [-1, +1]

transform = transforms.Compose(tfs)

trainset = torchvision.datasets.CIFAR10(root=home, 
                                        train=True,
                                        download=True, 
                                        transform=transform)

testset = torchvision.datasets.CIFAR10(root=home, 
                                       train=False,
                                       download=True, 
                                       transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
Files already downloaded and verified
Files already downloaded and verified
trainloader = torch.utils.data.DataLoader(trainset, 
                                          batch_size=batch_size,
                                          shuffle=True, 
                                          num_workers=2)

testloader = torch.utils.data.DataLoader(testset, 
                                         batch_size=batch_size,
                                         shuffle=False, 
                                         num_workers=2)
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
print_(images.shape)

img = images[0]
print_(img.shape)

img[0][0]

torch.Size([128, 3, 32, 32])

torch.Size([3, 32, 32])

tensor([0.4039, 0.3961, 0.3961, 0.3882, 0.4118, 0.4353, 0.4431, 0.4431, 0.4588,
        0.4588, 0.4667, 0.4824, 0.4902, 0.4902, 0.5137, 0.5765, 0.6627, 0.6863,
        0.6471, 0.5843, 0.5373, 0.5216, 0.6078, 0.7020, 0.7569, 0.6392, 0.5294,
        0.4824, 0.5686, 0.6235, 0.5529, 0.6235])

UnNormalize the images

  • x = [-1, 1]
  • y = (x/2) + 0.5
    • x = -1 -> y = 0
    • x = +1 -> y = 1
# functions to show an image

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis("off")
    
    img_name = get_img_name(lesson)
    plt.savefig(img_name)
    insert_image(img_name, topic)
    
    plt.show()
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

print_(classes[labels[0]])
imshow(images[0])

horse

png

# show images
print_(images.shape)

print_(images[0].shape)

n_grid = 4
grid = torchvision.utils.make_grid(images[:n_grid])

# 32 * 4 image pixels + 5 * 2 border pixels = 128
# 32 image pixels + 2 * 2 border pixels = 36
print_(f"grid = {grid.shape}")

labels = ''.join(f"{classes[i]:10s}" for i in range(n_grid))
print(labels)

imshow(grid)

torch.Size([128, 3, 32, 32])

torch.Size([3, 32, 32])

grid = torch.Size([3, 36, 138])

plane     car       bird      cat       

png

Define a Convolutional Neural Network

$ n_{out} = \frac{n_{in} + 2 * \text{padding} - \text{kernel_size}}{stride} + 1 $

import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, 
                               out_channels=6, 
                               kernel_size=5,
                               stride=1,
                               padding=0)
        
        self.pool = nn.MaxPool2d(kernel_size=2, 
                                 stride=2)
        
        self.conv2 = nn.Conv2d(in_channels=6, 
                               out_channels=16, 
                               kernel_size=5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # (_, 3, 32, 32)
        
        x = self.pool(F.relu(self.conv1(x))) #(32-5) + 1 = 28
        # (_, 6, 28, 28) # (28-2)/2 + 1 = 14
        # (_, 6, 14, 14)
        
        x = self.pool(F.relu(self.conv2(x))) #(14-5) + 1 = 10
        # (_, 16, 10, 10) # (10-2)/2 + 1 = 5
        # (_, 16, 5, 5)
        
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        # 16 * 5 * 5
        
        x = F.relu(self.fc1(x))
        # 120
        
        x = F.relu(self.fc2(x))
        # 84
        
        x = self.fc3(x)
        # 10
        
        return x


net = Net()
print(net)
Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

Define a Loss function and optimizer

import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(net.parameters(), 
                      lr=learning_rate, 
                      momentum=0.9)

Train the network

# loop over the dataset multiple times

net.train()

if torch.cuda.device_count() > 1:
    print_(f"Let's use {torch.cuda.device_count()} GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    net = nn.DataParallel(net)
net.to(device)

for epoch in range(epochs):

    running_loss = 0.0
    
    for i, data in enumerate(tqdm(trainloader)):
        
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    running_loss /= len(trainloader)
    print_(f"Epoch {epoch+1} Loss {running_loss:.3f}")

print('Finished Training')
100%|█████████████████████████████████████████| 391/391 [00:04<00:00, 84.74it/s]

Epoch 1 Loss 2.028

 80%|███████████████████████████████▉        | 312/391 [00:03<00:00, 102.02it/s]
PATH = f"{models_path}/cifar_net.pth"
print_(PATH)

if torch.cuda.device_count() > 1:
    torch.save(net.module.state_dict(), PATH)
else:
    torch.save(net.state_dict(), PATH)

Test the network on the test data

dataiter = iter(testloader)
images, labels = dataiter.next()

n_grid = 4
grid = torchvision.utils.make_grid(images[:n_grid])

labels = ''.join(f"{classes[i]:10s}" for i in range(n_grid))
print_(labels)

imshow(grid)
net = Net()

print_(f"Loading {PATH}")

"""

# original saved file with DataParallel
state_dict = torch.load(PATH, map_location=device)

# create new OrderedDict that does not contain `module.`
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    # remove `module.` if DataParallel
    name = k.replace("module.", "") #k[7:] 
    #print(k)
    new_state_dict[name] = v
    
# load params
net.load_state_dict(new_state_dict)
"""

net.load_state_dict(torch.load(PATH, 
                               map_location=device)
                   )
dataiter = iter(testloader)
images, labels = dataiter.next()

net.to(device)
images, labels = images.to(device), labels.to(device)

outputs = net(images)

print_(labels[:10])
_, predicted = torch.max(outputs, 1)
print_(predicted[:10])

correct = (predicted == labels).sum().item()
print_(correct)
true_labels = [f"{classes[i]:10s}" for i in labels]
true_labels = " ".join(true_labels[:4])
print(true_labels)

pred_labels = [f"{classes[i]:10s}" for i in predicted]
pred_labels = " ".join(pred_labels[:4])
print(pred_labels)
correct = 0
total = 0

# since we're not training
# we don't need to calculate the gradients for our outputs

net = net.to(device)
net.eval()

with torch.no_grad():
    for data in tqdm(testloader):
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        # calculate outputs by running images through the network
        outputs = net(images)
        
        # the class with the highest energy is 
        # what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print_(f"Accuracy on the {total} test images: {correct/total:.2%}")