logistic_regression_pytorch.py

# -*- coding: utf-8 -*-
"""logistic_regression_pytorch.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1JGuyYYa187eO4VsO7dtAKdkWrs842-gY
"""

import kagglehub
# Logistic regression almost identical to linear regression model with weights and bias matrices, pred = x @ w.t() + b
## Since image is (rgb,x,y), (1,28,28), nn.Linear expects a vector of 28x28, size 784...
## Output is vect;or of size 10, with each element signifying probbabiliy a particular target label (0 - 9). Predicted label is one with highest probability
#https://www.kaggle.com/datasets/hojjatk/mnist-dataset
# Download latest version
# path = kagglehub.dataset_download("hojjatk/mnist-dataset")
# print("Path to dataset files:", path)


import torch
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

dataset = MNIST(root='data/', download=True)#, transform=transforms.ToTensor())
len(dataset)

# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
import numpy as np

# %matplotlib inline

image, label = dataset[0]
plt.imshow(image, cmap='gray')
# plt.imshow(img.squeeze(), cmap='gray')
print('Label:', label)

image, label = dataset[10]
plt.imshow(image, cmap='gray')
# plt.imshow(img.squeeze(), cmap='gray')
print('Label:', label)

# transform converts images into tensors
dataset = MNIST(root='data/', download=True, transform=transforms.ToTensor())
img_tensor, label = dataset[0]
print(img_tensor.shape, 'Label:',label)

"""1. Training Set:_______compute loss & adjust weights of model using gradient descent
2. Validation Set:_____adjust hyperparameters, i.e. learning rate, and pick best version
3. Test Set: __________compare models
"""

print(img_tensor[:,10:15,10:15])
print(torch.max(img_tensor))
print(torch.min(img_tensor))

def split_indices(n, val_pct):
    n_val = int(n*val_pct) # size of validation set, n=60000 & val_pct=0.1
    idxs = np.random.permutation(n) # create random permutation of 0 to n-1
    return idxs[n_val:], idxs[:n_val] # pick first n_val indices for validation set

"""split_indices randomly shuffles the array indices and separates out a specified portion (training images are often ordered by target lables, i.e. images of 0s, followed b images of 1's, followed by 2's"""

train_indices, val_indices = split_indices(len(dataset), val_pct=0.2)
print(len(train_indices), len(val_indices))
print('Sample train indices', train_indices[:10])
print('Sample val indices', val_indices[:10])

from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

batch_size = 100
#training
train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset, batch_size, sampler=train_sampler)
#validation
val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, batch_size, sampler=val_sampler)

import torch.nn as nn
input_size = 28 * 28
num_classes = 10

# Logistic regression model
model = nn.Linear(input_size, num_classes)
print(model.weight.shape)
model.weight

print (model.bias.shape)
model.bias

# for images, labels in train_loader:
#     print(labels)
#     print(images.shape)
#     # images = images.reshape(-1, 28*28)
#     # print('reshape images.shape:', images.shape)
#     outputs = model(images)
#     break
    ## Without reshape, outputs error due to torch.Size([100,1,28,28])
    ## Must flatten 1,28,28

class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out

model = MnistModel()

"""*Inside __init__ constructor method, is instantiated the weights and biases using nn.Linear,
*Inside forward(a,b) method which is invoked when passing a batch of ## inputs to model, it flattens out the input tensor, then passing to self.linear(xb)
*-1 allows to work with any batch size
"""

print(model.linear.weight.shape, model.linear.bias.shape) # .weight and .bias are now inside linear
list(model.parameters())

for images, labels in train_loader:
    # print(labels)
    # print(images.shape)
    outputs = model(images)
    break
print('outputs.shape : ', outputs.shape) # should be [100, 10]
print('Sample outputs:\n', outputs[:2].data)
# outputs a few negatives, which shouldn't be... so use SoftMax!!
# SoftMax
# makes positive, and also makes larger, pushing model to one particular output, hence Logit! not probabiility
# replaces

import torch.nn.functional as F

probs = F.softmax(outputs, dim=1)
print('Sample probabilities:\n', probs[:2].data)
print('Sum: ', torch.sum(probs[0]).item()) # add up probabilities of an outpu row.

max_probs, preds = torch.max(probs, dim=1)
print(preds)
print(max_probs)

torch.sum(labels == preds) / len (labels)
 # compare real digits with expected digits
# numbers are off since weights were chosen randomly

def accuracy1(labels_1,preds_2):
  return torch.sum(labels_1 == preds_2).item() / len(labels_1)
accuracy1(preds, labels)

"""Problems with accuracy is not differentiable function:
1. torch.max & == are both non-continuousand non-differentiable operations, so can't use accuracy for computing gradients w.r.t. weights and biases.
2. doen'st take into acccount actual proabilities predicted by model
So, not good loss function for classification probs, must use cross entropy, which takes the 10 outputs, and picks of the 10, the 1 correspoonding to highest probability. So take logarithm of highest.
preds close to one, lower the loss
"""

loss_fn = F.cross_entropy
# Loss for current batch of data
loss = loss_fn(outputs, labels)
print(loss)

"""Output is: tensor(2.292)
To interpret, take e^-2.29 = .1
"""

learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### 1. calcualtes loss for batch of data,
### 2. optionally performs gradient descent update step if optimizer provided,
### 3. optionally coputes a metric of accuracy using preds and targets
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)
    loss = loss_func(preds, yb)
    if opt is not None:
        loss.backward() # compute gradients
        opt.step() # update parameters
        opt.zero_grad() # reset gradients
    metric_result = None
    if metric is not None: #compute metric
      metric_result = metric(preds,yb)

    return loss.item(), len(xb), metric_result

def evaluate(model, loss_fn, valid_dl, metric=None):
    with torch.no_grad(): # pass each batch through model
        results = [loss_batch(model, loss_fn, xb, yb, metric=metric)
                   for xb, yb in valid_dl]
        losses, nums, metrics = zip(*results) # separate losses, counts and metrics
        total = np.sum(nums) #total size of dataset
        avg_loss = np.sum(np.multiply(losses, nums)) / total # average loss
        avg_metric = None
        if metric is not None:  # avg. of metric across batches
            avg_metric = np.sum(np.multiply(metrics, nums)) / total
    return avg_loss, total, avg_metric
#

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.sum(preds == labels).item() / len(preds)
#

"""* Softmax not needed for outputs since it doesn't change the relative order of the results.
*because e^x is an increasing function (y1>y2,then e^y1>e^y2)
"""

val_loss, total, val_acc = evaluate(model, loss_fn, val_loader,metric=accuracy)

print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss, val_acc))
# loss: 2.3

def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
    for epoch in range(epochs):
        #Training
        for xb, yb in train_dl:
           loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt) # takes data batch by batch by batch performingin gradient descent
        #Evalution
        result = evaluate(model, loss_fn, valid_dl, metric)
        val_loss, total, val_metric = result
        #print progress
        if metric is None:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, val_loss))
        else:
          print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__, val_metric))
#

model = MnistModel()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

loss = [
1.8867,
1.5860,
1.3754,
1.2241,
1.1120,
1.0261,
0.9583,
0.9037,
0.8588,
0.8209,
0.7888,
0.7610,
0.7368,
0.7154,
0.6963,
0.6793,
0.6640,
0.6501 ,
0.6373 ,
0.6257,
]
plt.plot(loss, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Losss vs. No. of epochs');

accuracies = [
0.6455,
0.7431,
0.7730,
0.7913,
0.8043,
0.8131,
0.8191,
0.8237,
0.8279,
0.8370,
0.8406,
0.8434,
0.8465,
0.8490,
0.8512,
0.8532,
0.8545,
0.8570,
0.8577
]
plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracies')
plt.title('accuracies vs. No. of epochs');
# 2.55

#test dataset
test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor())

def predict_image(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _, preds = torch.max(yb, dim=1)
    return preds[0].item()

img, label = test_dataset[1839]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', PREDICTED: ', predict_image(img, model))
# label 2, predicted 8

img.unsqueeze(0).shape

"""img.unsqueeze adds another dimension at the beginning of the 1x28x28 tensor, making it a 1 x 1 x 28x28,  which teh model views as a batch containing a single image."""

img, label = test_dataset[0]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 7, predicted 7

img, label = test_dataset[10]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 0, predicted 0

img, label = test_dataset[193]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 9, predicted 9

test_loader = DataLoader(test_dataset, batch_size=200)
result = evaluate(model, loss_fn, test_loader, metric=accuracy)
test_loss, total, test_acc = result
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))

torch.save(model.state_dict(), 'mnist-logistic.pth')
model.state_dict()

# to load model weights, instantiate a  new object of class MnistModel and use .load_state_dict method
model2 = MnistModel()
model2.load_state_dict(torch.load('mnist-logistic.pth'))
model2.state_dict()

#Sanity Check to ensure same loss & accuracy as first model.
test_loss, total, test_acc = evaluate(model2, loss_fn, test_loader, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))

"""Saving weights and bias matrices to disk for later reuse and Avoid Training from Scratch!
.state_dict method returns an OrderedDict containing all the weights & bieas matrices
"""