PyTorch
Published:
This lesson covers PyTorch Tutorial, https://pytorch.org/tutorials/beginner/basics/intro.html
Optimizing Model Parameters
topic = "pytorch"
lesson = 7
from n import *
home, models_path = get_project_dir("FashionMNIST")
print_(home)
/home/naneja/datasets/n/FashionMNIST
%matplotlib inline
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import time
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from IPython.display import display, HTML
training_data = datasets.FashionMNIST(root=home,
train=True,
download=True,
transform=ToTensor())
test_data = datasets.FashionMNIST(root=home,
train=False,
download=True,
transform=ToTensor())
batch_size = 64
train_dataloader = DataLoader(training_data,
batch_size=batch_size,
shuffle=True)
test_dataloader = DataLoader(test_data,
batch_size=batch_size,
shuffle=True)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
Hyperparameters
adjustable parameters that let you control the model optimization process
Number of Epochs - the number times to iterate over the dataset
Batch Size - the number of data samples propagated through the network before the parameters are updated
Learning Rate - how much to update models parameters at each batch/epoch
- Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training
learning_rate = 1e-3
batch_size = 64
epochs = 5
Optimization Loop
- Once we set our hyperparameters, we can then train and optimize our model with an optimization loop.
Each iteration of the optimization loop is called an epoch
- Each epoch consists of two main parts:
- The Train Loop - iterate over the training dataset and try to converge to optimal parameters.
- The Validation/Test Loop - iterate over the test dataset to check if model performance is improving.
Loss Function
When presented with some training data, our untrained network is likely not to give the correct answer
Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training
To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value
- Common loss functions include
nn.MSELoss
(Mean Square Error) for regression tasksnn.NLLLoss
(Negative Log Likelihood) for classificationnn.CrossEntropyLoss
combines nn.LogSoftmax and nn.NLLLoss
- We pass our model’s output logits to nn.CrossEntropyLoss, which will normalize the logits and compute the prediction error
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()
print_("loss_fn", loss_fn)
loss_fn
CrossEntropyLoss()
Optimizer
adjusting model parameters to reduce model error in each training step
Stochastic Gradient Descent, ADAM and RMSProp
initialize the optimizer by registering the model’s parameters that need to be trained, and passing in the learning rate hyperparameter
In training loop:
- Call
optimizer.zero_grad()
to reset gradients of model parameters- Gradients by default add up
- to prevent double-counting, we explicitly zero them at each iteration
- Backpropagate the prediction loss with a call to
loss.backward()
- PyTorch deposits the gradients of the loss w.r.t. each parameter
- Once we have our gradients, we call
optimizer.step()
to adjust the parameters by the gradients collected in the backward pass
- Call
optimizer = torch.optim.SGD(model.parameters(),
lr=learning_rate)
print_("optimizer", optimizer)
optimizer
SGD (
Parameter Group 0
dampening: 0
foreach: None
lr: 0.001
maximize: False
momentum: 0
nesterov: False
weight_decay: 0
)
def train(dataloader, model, loss_fn, optimizer):
num_batches = len(test_dataloader)
size = len(dataloader.dataset) # 60000
model = model.to(device)
model.train()
batch_loss, total_correct = 0., 0.
for batch, (X, y) in enumerate(tqdm(dataloader)):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropogation
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_loss += loss.item()
correct = (pred.argmax(axis=1) == y)
correct = correct.type(torch.float)
correct = correct.sum()
correct = correct.item()
total_correct += correct
total_correct /= size
batch_loss /= num_batches
msg = (f"Train_Accuracy = {total_correct:0.1%}{tab}"
f"Train_Loss = {batch_loss:.3f}")
print_(msg)
# Train One Epoch
train(train_dataloader, model, loss_fn, optimizer)
100%|████████████████████████████████████████| 938/938 [00:04<00:00, 199.92it/s]
Train_Accuracy = 31.9% Train_Loss = 13.344
def test(dataloader, model, loss_fn):
num_batches = len(test_dataloader)
size = len(dataloader.dataset) # 10000
model.eval()
batch_loss, total_correct = 0, 0
with torch.no_grad():
for X, y in tqdm(dataloader):
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y).item()
batch_loss += loss
correct = (pred.argmax(1) == y)
correct = correct.type(torch.float)
correct = correct.sum()
correct = correct.item()
total_correct += correct
batch_loss /= num_batches
total_correct /= size
msg = (f"Test_Accuracy={total_correct:0.1%}{tab}"
f"Test_Loss={batch_loss:.3f}")
print_(msg)
# Test One Epoch
test(test_dataloader, model, loss_fn)
46%|██████████████████▊ | 72/157 [00:00<00:00, 234.39it/s]
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=learning_rate)
epochs = 2
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
time.sleep(1)
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")