-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlogistic_regression_pytorch.py
348 lines (282 loc) · 10.9 KB
/
logistic_regression_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# -*- coding: utf-8 -*-
"""logistic_regression_pytorch.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1JGuyYYa187eO4VsO7dtAKdkWrs842-gY
"""
import kagglehub
# Logistic regression almost identical to linear regression model with weights and bias matrices, pred = x @ w.t() + b
## Since image is (rgb,x,y), (1,28,28), nn.Linear expects a vector of 28x28, size 784...
## Output is vect;or of size 10, with each element signifying probbabiliy a particular target label (0 - 9). Predicted label is one with highest probability
#https://www.kaggle.com/datasets/hojjatk/mnist-dataset
# Download latest version
# path = kagglehub.dataset_download("hojjatk/mnist-dataset")
# print("Path to dataset files:", path)
import torch
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
dataset = MNIST(root='data/', download=True)#, transform=transforms.ToTensor())
len(dataset)
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
import numpy as np
# %matplotlib inline
image, label = dataset[0]
plt.imshow(image, cmap='gray')
# plt.imshow(img.squeeze(), cmap='gray')
print('Label:', label)
image, label = dataset[10]
plt.imshow(image, cmap='gray')
# plt.imshow(img.squeeze(), cmap='gray')
print('Label:', label)
# transform converts images into tensors
dataset = MNIST(root='data/', download=True, transform=transforms.ToTensor())
img_tensor, label = dataset[0]
print(img_tensor.shape, 'Label:',label)
"""1. Training Set:_______compute loss & adjust weights of model using gradient descent
2. Validation Set:_____adjust hyperparameters, i.e. learning rate, and pick best version
3. Test Set: __________compare models
"""
print(img_tensor[:,10:15,10:15])
print(torch.max(img_tensor))
print(torch.min(img_tensor))
def split_indices(n, val_pct):
n_val = int(n*val_pct) # size of validation set, n=60000 & val_pct=0.1
idxs = np.random.permutation(n) # create random permutation of 0 to n-1
return idxs[n_val:], idxs[:n_val] # pick first n_val indices for validation set
"""split_indices randomly shuffles the array indices and separates out a specified portion (training images are often ordered by target lables, i.e. images of 0s, followed b images of 1's, followed by 2's"""
train_indices, val_indices = split_indices(len(dataset), val_pct=0.2)
print(len(train_indices), len(val_indices))
print('Sample train indices', train_indices[:10])
print('Sample val indices', val_indices[:10])
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
batch_size = 100
#training
train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset, batch_size, sampler=train_sampler)
#validation
val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, batch_size, sampler=val_sampler)
import torch.nn as nn
input_size = 28 * 28
num_classes = 10
# Logistic regression model
model = nn.Linear(input_size, num_classes)
print(model.weight.shape)
model.weight
print (model.bias.shape)
model.bias
# for images, labels in train_loader:
# print(labels)
# print(images.shape)
# # images = images.reshape(-1, 28*28)
# # print('reshape images.shape:', images.shape)
# outputs = model(images)
# break
## Without reshape, outputs error due to torch.Size([100,1,28,28])
## Must flatten 1,28,28
class MnistModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.reshape(-1, 784)
out = self.linear(xb)
return out
model = MnistModel()
"""*Inside __init__ constructor method, is instantiated the weights and biases using nn.Linear,
*Inside forward(a,b) method which is invoked when passing a batch of ## inputs to model, it flattens out the input tensor, then passing to self.linear(xb)
*-1 allows to work with any batch size
"""
print(model.linear.weight.shape, model.linear.bias.shape) # .weight and .bias are now inside linear
list(model.parameters())
for images, labels in train_loader:
# print(labels)
# print(images.shape)
outputs = model(images)
break
print('outputs.shape : ', outputs.shape) # should be [100, 10]
print('Sample outputs:\n', outputs[:2].data)
# outputs a few negatives, which shouldn't be... so use SoftMax!!
# SoftMax
# makes positive, and also makes larger, pushing model to one particular output, hence Logit! not probabiility
# replaces
import torch.nn.functional as F
probs = F.softmax(outputs, dim=1)
print('Sample probabilities:\n', probs[:2].data)
print('Sum: ', torch.sum(probs[0]).item()) # add up probabilities of an outpu row.
max_probs, preds = torch.max(probs, dim=1)
print(preds)
print(max_probs)
torch.sum(labels == preds) / len (labels)
# compare real digits with expected digits
# numbers are off since weights were chosen randomly
def accuracy1(labels_1,preds_2):
return torch.sum(labels_1 == preds_2).item() / len(labels_1)
accuracy1(preds, labels)
"""Problems with accuracy is not differentiable function:
1. torch.max & == are both non-continuousand non-differentiable operations, so can't use accuracy for computing gradients w.r.t. weights and biases.
2. doen'st take into acccount actual proabilities predicted by model
So, not good loss function for classification probs, must use cross entropy, which takes the 10 outputs, and picks of the 10, the 1 correspoonding to highest probability. So take logarithm of highest.
preds close to one, lower the loss
"""
loss_fn = F.cross_entropy
# Loss for current batch of data
loss = loss_fn(outputs, labels)
print(loss)
"""Output is: tensor(2.292)
To interpret, take e^-2.29 = .1
"""
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
### 1. calcualtes loss for batch of data,
### 2. optionally performs gradient descent update step if optimizer provided,
### 3. optionally coputes a metric of accuracy using preds and targets
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
preds = model(xb)
loss = loss_func(preds, yb)
if opt is not None:
loss.backward() # compute gradients
opt.step() # update parameters
opt.zero_grad() # reset gradients
metric_result = None
if metric is not None: #compute metric
metric_result = metric(preds,yb)
return loss.item(), len(xb), metric_result
def evaluate(model, loss_fn, valid_dl, metric=None):
with torch.no_grad(): # pass each batch through model
results = [loss_batch(model, loss_fn, xb, yb, metric=metric)
for xb, yb in valid_dl]
losses, nums, metrics = zip(*results) # separate losses, counts and metrics
total = np.sum(nums) #total size of dataset
avg_loss = np.sum(np.multiply(losses, nums)) / total # average loss
avg_metric = None
if metric is not None: # avg. of metric across batches
avg_metric = np.sum(np.multiply(metrics, nums)) / total
return avg_loss, total, avg_metric
#
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.sum(preds == labels).item() / len(preds)
#
"""* Softmax not needed for outputs since it doesn't change the relative order of the results.
*because e^x is an increasing function (y1>y2,then e^y1>e^y2)
"""
val_loss, total, val_acc = evaluate(model, loss_fn, val_loader,metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss, val_acc))
# loss: 2.3
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
for epoch in range(epochs):
#Training
for xb, yb in train_dl:
loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt) # takes data batch by batch by batch performingin gradient descent
#Evalution
result = evaluate(model, loss_fn, valid_dl, metric)
val_loss, total, val_metric = result
#print progress
if metric is None:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, val_loss))
else:
print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__, val_metric))
#
model = MnistModel()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)
loss = [
1.8867,
1.5860,
1.3754,
1.2241,
1.1120,
1.0261,
0.9583,
0.9037,
0.8588,
0.8209,
0.7888,
0.7610,
0.7368,
0.7154,
0.6963,
0.6793,
0.6640,
0.6501 ,
0.6373 ,
0.6257,
]
plt.plot(loss, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Losss vs. No. of epochs');
accuracies = [
0.6455,
0.7431,
0.7730,
0.7913,
0.8043,
0.8131,
0.8191,
0.8237,
0.8279,
0.8370,
0.8406,
0.8434,
0.8465,
0.8490,
0.8512,
0.8532,
0.8545,
0.8570,
0.8577
]
plt.plot(accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracies')
plt.title('accuracies vs. No. of epochs');
# 2.55
#test dataset
test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor())
def predict_image(img, model):
xb = img.unsqueeze(0)
yb = model(xb)
_, preds = torch.max(yb, dim=1)
return preds[0].item()
img, label = test_dataset[1839]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', PREDICTED: ', predict_image(img, model))
# label 2, predicted 8
img.unsqueeze(0).shape
"""img.unsqueeze adds another dimension at the beginning of the 1x28x28 tensor, making it a 1 x 1 x 28x28, which teh model views as a batch containing a single image."""
img, label = test_dataset[0]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 7, predicted 7
img, label = test_dataset[10]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 0, predicted 0
img, label = test_dataset[193]
plt.imshow(img[0], cmap='gray')
print('Label:', label, ', Predicted:', predict_image(img, model))
# label 9, predicted 9
test_loader = DataLoader(test_dataset, batch_size=200)
result = evaluate(model, loss_fn, test_loader, metric=accuracy)
test_loss, total, test_acc = result
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))
torch.save(model.state_dict(), 'mnist-logistic.pth')
model.state_dict()
# to load model weights, instantiate a new object of class MnistModel and use .load_state_dict method
model2 = MnistModel()
model2.load_state_dict(torch.load('mnist-logistic.pth'))
model2.state_dict()
#Sanity Check to ensure same loss & accuracy as first model.
test_loss, total, test_acc = evaluate(model2, loss_fn, test_loader, metric=accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(test_loss, test_acc))
"""Saving weights and bias matrices to disk for later reuse and Avoid Training from Scratch!
.state_dict method returns an OrderedDict containing all the weights & bieas matrices
"""