PyTorch

4 minute read

Published:

This lesson covers PyTorch Tutorial, https://pytorch.org/tutorials/beginner/basics/intro.html

Optimizing Model Parameters

topic = "pytorch"
lesson = 7

from n import *
home, models_path = get_project_dir("FashionMNIST")
print_(home)

/home/naneja/datasets/n/FashionMNIST

%matplotlib inline
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import time

import torch
from torch import nn

from torchvision import datasets
from torchvision.transforms import ToTensor

from torch.utils.data import DataLoader

from IPython.display import display, HTML
training_data = datasets.FashionMNIST(root=home,
                                     train=True,
                                     download=True,
                                     transform=ToTensor())

test_data = datasets.FashionMNIST(root=home, 
                                  train=False,
                                  download=True,
                                  transform=ToTensor())

batch_size = 64

train_dataloader = DataLoader(training_data,
                              batch_size=batch_size,
                              shuffle=True)

test_dataloader = DataLoader(test_data,
                             batch_size=batch_size,
                             shuffle=True)
class NeuralNetwork(nn.Module):
    def __init__(self):
        
        super(NeuralNetwork, self).__init__()
        
        self.flatten = nn.Flatten()
        
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
model = NeuralNetwork()

Hyperparameters

  • adjustable parameters that let you control the model optimization process

  • Number of Epochs - the number times to iterate over the dataset

  • Batch Size - the number of data samples propagated through the network before the parameters are updated

  • Learning Rate - how much to update models parameters at each batch/epoch

    • Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training
learning_rate = 1e-3
batch_size = 64
epochs = 5

Optimization Loop

  • Once we set our hyperparameters, we can then train and optimize our model with an optimization loop.
  • Each iteration of the optimization loop is called an epoch

  • Each epoch consists of two main parts:
    • The Train Loop - iterate over the training dataset and try to converge to optimal parameters.
    • The Validation/Test Loop - iterate over the test dataset to check if model performance is improving.

Loss Function

  • When presented with some training data, our untrained network is likely not to give the correct answer

  • Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training

  • To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value

  • Common loss functions include
    • nn.MSELoss (Mean Square Error) for regression tasks
    • nn.NLLLoss (Negative Log Likelihood) for classification
    • nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss
  • We pass our model’s output logits to nn.CrossEntropyLoss, which will normalize the logits and compute the prediction error
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()
print_("loss_fn", loss_fn)

loss_fn

CrossEntropyLoss()

Optimizer

  • adjusting model parameters to reduce model error in each training step

  • Stochastic Gradient Descent, ADAM and RMSProp

  • initialize the optimizer by registering the model’s parameters that need to be trained, and passing in the learning rate hyperparameter

  • In training loop:

    • Call optimizer.zero_grad() to reset gradients of model parameters
      • Gradients by default add up
      • to prevent double-counting, we explicitly zero them at each iteration
    • Backpropagate the prediction loss with a call to loss.backward()
      • PyTorch deposits the gradients of the loss w.r.t. each parameter
    • Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=learning_rate)

print_("optimizer", optimizer)

optimizer

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)
def train(dataloader, model, loss_fn, optimizer):

    num_batches = len(test_dataloader)
    size = len(dataloader.dataset) # 60000
    
    model = model.to(device)
    model.train()
    
    batch_loss, total_correct = 0., 0.
    
    for batch, (X, y) in enumerate(tqdm(dataloader)):

        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropogation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_loss += loss.item()

        correct = (pred.argmax(axis=1) == y)
        correct = correct.type(torch.float)
        correct = correct.sum()
        correct = correct.item()
        total_correct += correct

    total_correct /= size
    batch_loss /= num_batches
    
    msg = (f"Train_Accuracy = {total_correct:0.1%}{tab}"
           f"Train_Loss = {batch_loss:.3f}")
    print_(msg)

# Train One Epoch
train(train_dataloader, model, loss_fn, optimizer)
100%|████████████████████████████████████████| 938/938 [00:04<00:00, 199.92it/s]

Train_Accuracy = 31.9%  Train_Loss = 13.344

def test(dataloader, model, loss_fn):
    
    num_batches = len(test_dataloader)
    size = len(dataloader.dataset) # 10000
    
    model.eval() 
    
    batch_loss, total_correct = 0, 0
    
    with torch.no_grad():
        for X, y in tqdm(dataloader):
            X, y = X.to(device), y.to(device)
            pred = model(X)
            
            loss = loss_fn(pred, y).item()
            batch_loss += loss
            
            correct = (pred.argmax(1) == y)
            correct = correct.type(torch.float)
            correct = correct.sum()
            correct = correct.item()
            total_correct += correct
            
    batch_loss /= num_batches
    total_correct /= size
    
    msg = (f"Test_Accuracy={total_correct:0.1%}{tab}"
           f"Test_Loss={batch_loss:.3f}")
    print_(msg)

# Test One Epoch
test(test_dataloader, model, loss_fn)
 46%|██████████████████▊                      | 72/157 [00:00<00:00, 234.39it/s]
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=learning_rate)

epochs = 2
for t in range(epochs):
    
    print(f"Epoch {t+1}\n-------------------------------")
    time.sleep(1)
    train(train_dataloader, model, loss_fn, optimizer)
    
    test(test_dataloader, model, loss_fn)
    
print("Done!")