import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from latex import latexify
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline
%config InlineBackend.figure_format = "retina"
if (torch.cuda.is_available()):
= torch.device("cuda")
device else:
= torch.device("cpu")
device print(device)
cpu
def generate_xor_dataset(size):
= [[-3.5, 3.5], [-3.5, -3.5], [3.5, -3.5], [3.5, 3.5]]
centers = make_blobs(n_samples = size, centers = centers, n_features = 2, random_state = 13, cluster_std = 2)
X, _ = torch.FloatTensor(X)
X = (X[:, 0] > 0) ^ (X[:, 1] > 0)
y return X, y.float().view(-1, 1)
Plotting the XOR Dataset
= generate_xor_dataset(400)
X, y = train_test_split(X, y, test_size = 0.5, random_state = 42)
X_train, X_test, y_train, y_test = 2, fig_width = 7)
latexify(columns 0], X_train[:, 1], c = y_train, cmap = "viridis", s = 10, label = "Training Samples")
plt.scatter(X_train[:, 0], X_test[:, 1], c = y_test, cmap = "viridis", s = 10, label = "Test Samples", alpha = 0.5)
plt.scatter(X_test[:, r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel("XOR Dataset - Training and Testing Samples")
plt.title(
plt.legend()
plt.show()
0], X_train[:, 1], c = y_train, cmap = "viridis", s = 10, label = "Training Samples")
plt.scatter(X_train[:, r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel("XOR Dataset - Training Samples")
plt.title(
plt.legend()
plt.show()
0], X_test[:, 1], c = y_test, cmap = "viridis", s = 10, label = "Test Samples", alpha = 0.5)
plt.scatter(X_test[:, r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel("XOR Dataset - Testing Samples")
plt.title(
plt.legend() plt.show()
X_train.shape, X_test.shape, y_train.shape, y_test.shape
(torch.Size([200, 2]),
torch.Size([200, 2]),
torch.Size([200, 1]),
torch.Size([200, 1]))
= X_train.to(device)
X_train = X_test.to(device)
X_test = y_train.to(device)
y_train = y_test.to(device) y_test
Model Architecture
class XORMLP(nn.Module):
def __init__(self, hidden_layer1, hidden_layer2):
super().__init__()
self.lin1 = nn.Linear(2, hidden_layer1)
self.lin2 = nn.Linear(hidden_layer1, hidden_layer2)
self.lin3 = nn.Linear(hidden_layer2, 1)
def forward(self, x):
= torch.relu(self.lin1(x))
x = torch.relu(self.lin2(x))
x = self.lin3(x)
logits return logits
Model Training with NO REGULARIZATION
\[\text{Loss Function} = \text{Binary Cross Entropy with Logits} \] \[L_N = - [y_N \cdot \log({\sigma(\hat{y}_N)}) + (1 - y_N) \cdot \log({1 - \sigma(\hat{y}_N)})]\] \[ \sigma(z) = \frac{1}{1 + e^{-z}}\]
= nn.BCEWithLogitsLoss()
loss_fn = torch.optim.Adam(model.parameters(), lr = 0.03)
optimizer
= torch.utils.data.TensorDataset(X_train, y_train)
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = True)
train_loader
for epoch in range(10000):
= 0.0
running_loss = 0
correct = 0
total for inputs, labels in train_loader:
optimizer.zero_grad()= model(inputs)
logits = loss_fn(logits, labels)
loss
loss.backward()
optimizer.step()
+= loss.item()
running_loss += ((logits > 0) == labels).sum().item()
correct += labels.size(0)
total
= running_loss / len(train_loader)
epoch_loss = correct / total
accuracy if (epoch + 1) % 1000 == 0:
print(f"Epoch [{epoch + 1}/10000] | Loss: {epoch_loss} | Accuracy: {accuracy:.7f}")
Epoch [1000/10000] | Loss: 6.0668761372133826e-05 | Accuracy: 1.0000000
KeyboardInterrupt:
Inference from the Model on Training Samples
eval()
model.
with torch.no_grad():
= model(X_test)
logits = (logits > 0).float()
predictions = (predictions == y_test).sum().item() / y_test.size(0)
accuracy print(f"Test Accuracy: {accuracy:.7f}")
Test Accuracy: 0.9800000
Plotting the Decision Surface
def plot_decision_boundary(X_train, X_test, y_train, y_test, model, title):
= torch.cat([X_train, X_test], dim = 0).cpu().numpy()
X = torch.cat([y_train, y_test], dim = 0).cpu().numpy()
y
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max
= np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
xx, yy = model(torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()]).to(device))
Z
= (Z > 0).float().cpu().numpy().reshape(xx.shape)
Z
= plt.cm.Paired, alpha = 0.4)
plt.contourf(xx, yy, Z, cmap
0], X_train[:, 1], c = y_train, cmap = "viridis", s = 10)
plt.scatter(X_train[:, 0], X_test[:, 1], c = y_test, cmap = "viridis", s = 10, alpha = 0.5)
plt.scatter(X_test[:,
= mpatches.Patch(color = "yellow", label = "Class 1")
yellow_patch = mpatches.Patch(color = "darkblue", label = "Class 2")
blue_patch = mpatches.Patch(color = "darksalmon", label = "Predict Class 1")
pink_patch = mpatches.Patch(color = "lightblue", label = "Predict Class 2")
lblue_patch = [yellow_patch, blue_patch, pink_patch, lblue_patch], loc = "upper center", bbox_to_anchor = (0.5, 1.25), ncol = 2, fancybox = True, shadow = True)
plt.legend(handles
plt.title(title)r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel( plt.show()
Decision Boundary before training
= 2, fig_width = 9)
latexify(columns = XORMLP(hidden_layer1 = 3, hidden_layer2 = 2)
model "XOR Dataset - Decision Boundary - Untrained MLP") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
Decision Boundary After Training
= 2, fig_width = 9)
latexify(columns "XOR Dataset - Decision Boundary - MLP with No Regularization") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
Model Architecture \(L_1\) regularization
Model Training with \(L_1\) Regularization
\[\text{Loss Function} = \text{Binary Cross Entropy with Logits} + \beta \cdot \sum_{i} ||w_i^T w_i||_1\] \[L_N = - [y_N \cdot \log({\sigma(\hat{y}_N)}) + (1 - y_N) \cdot \log({1 - \sigma(\hat{y}_N)})] + \beta \cdot \sum_{i} ||w_i^T w_i||_1\] \[ \sigma(z) = \frac{1}{1 + e^{-z}}\]
= nn.BCEWithLogitsLoss()
loss_fn = torch.optim.Adam(model.parameters(), lr = 0.03)
optimizer
# L1 Regularization Penalty Coefficient
= 0.001
beta
= torch.utils.data.TensorDataset(X_train, y_train)
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = True)
train_loader
for epoch in range(10000):
= 0.0
running_loss = 0
correct = 0
total for inputs, labels in train_loader:
optimizer.zero_grad()= model(inputs)
logits = loss_fn(logits, labels)
loss
= 0.0
l1_regularization for param in model.parameters():
+= torch.norm(param, 1)
l1_regularization += beta * l1_regularization
loss
loss.backward()
optimizer.step()
+= loss.item()
running_loss += ((logits > 0) == labels).sum().item()
correct += labels.size(0)
total
= running_loss / len(train_loader)
epoch_loss = correct / total
accuracy if (epoch + 1) % 1000 == 0:
print(f"Epoch [{epoch + 1}/10000] | Loss: {epoch_loss} | Accuracy: {accuracy:.7f}")
Epoch [1000/10000] | Loss: 0.0748444204883916 | Accuracy: 0.9900000
KeyboardInterrupt:
eval()
model.
with torch.no_grad():
= model(X_test)
logits = (logits > 0).float()
predictions = (predictions == y_test).sum().item() / y_test.size(0)
accuracy print(f"Test Accuracy: {accuracy:.7f}")
Test Accuracy: 0.9900000
= 2, fig_width = 9)
latexify(columns = XORMLP(hidden_layer1 = 3, hidden_layer2 = 2)
model "XOR Dataset - Decision Boundary - Untrained MLP") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
= 2, fig_width = 9)
latexify(columns r"XOR Dataset - Decision Boundary - MLP with $L_1$ Regularization") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
Model Architecture \(L_2\) regularization
Model Training with \(L_2\) Regularization
\[\text{Loss Function} = \text{Binary Cross Entropy with Logits} + \beta \cdot \sum_{i} ||w_i^T w_i||_2\] \[L_N = - [y_N \cdot \log({\sigma(\hat{y}_N)}) + (1 - y_N) \cdot \log({1 - \sigma(\hat{y}_N)})] + \beta \cdot \sum_{i} ||w_i^T w_i||_2\] \[ \sigma(z) = \frac{1}{1 + e^{-z}}\]
= nn.BCEWithLogitsLoss()
loss_fn = torch.optim.Adam(model.parameters(), lr = 0.03)
optimizer
# L2 Regularization Penalty Coefficient
= 0.001
beta
= torch.utils.data.TensorDataset(X_train, y_train)
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = True)
train_loader
for epoch in range(10000):
= 0.0
running_loss = 0
correct = 0
total for inputs, labels in train_loader:
optimizer.zero_grad()= model(inputs)
logits = loss_fn(logits, labels)
loss
= 0.0
l1_regularization for param in model.parameters():
+= torch.norm(param, 2)
l1_regularization += beta * l1_regularization
loss
loss.backward()
optimizer.step()
+= loss.item()
running_loss += ((logits > 0) == labels).sum().item()
correct += labels.size(0)
total
= running_loss / len(train_loader)
epoch_loss = correct / total
accuracy if (epoch + 1) % 1000 == 0:
print(f"Epoch [{epoch + 1}/10000] | Loss: {epoch_loss} | Accuracy: {accuracy:.7f}")
Epoch [1000/10000] | Loss: 0.021696899086236954 | Accuracy: 0.9950000
KeyboardInterrupt:
eval()
model.
with torch.no_grad():
= model(X_test)
logits = (logits > 0).float()
predictions = (predictions == y_test).sum().item() / y_test.size(0)
accuracy print(f"Test Accuracy: {accuracy:.7f}")
Test Accuracy: 0.9850000
= 2, fig_width = 9)
latexify(columns = XORMLP(hidden_layer1 = 3, hidden_layer2 = 2)
model "XOR Dataset - Decision Boundary - Untrained MLP") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
= 2, fig_width = 9)
latexify(columns r"XOR Dataset - Decision Boundary - MLP with $L_2$ Regularization") plot_decision_boundary(X_train, X_test, y_train, y_test, model,
Logistic Regression using Polynomial Features \([x_1^2, x_2^2, x_1x_2]\)
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import accuracy_score
= PolynomialFeatures(degree = 2, include_bias = False)
poly = poly.fit_transform(X_train)
X_train_poly = poly.transform(X_test)
X_test_poly
= LogisticRegression()
logreg
logreg.fit(X_train_poly, y_train)
= logreg.predict(X_train_poly)
y_train_pred = logreg.predict(X_test_poly)
y_test_pred
= accuracy_score(y_train, y_train_pred)
train_accuracy = accuracy_score(y_test, y_test_pred)
test_accuracy
print("Train Accuracy: ", train_accuracy)
print("Test Accuracy: ", test_accuracy)
Train Accuracy: 1.0
Test Accuracy: 0.98
/Users/guntas13/micromamba/lib/python3.9/site-packages/sklearn/utils/validation.py:1229: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
Plotting Decision Surface for Logistic Regression
def plot_decision_boundary(X_train, X_test, y_train, y_test, model, title):
= np.concatenate([X_train, X_test], axis = 0)
X = np.concatenate([y_train, y_test], axis = 0)
y
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max
= np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
xx, yy = PolynomialFeatures(degree = 2, include_bias = False)
poly = poly.fit_transform(np.c_[xx.ravel(), yy.ravel()])
meshgrid_poly
= model.predict(meshgrid_poly)
Z
= Z.reshape(xx.shape)
Z
= plt.cm.Paired, alpha=0.4)
plt.contourf(xx, yy, Z, cmap
0], X_train[:, 1], c = y_train, cmap = "viridis", s = 10)
plt.scatter(X_train[:, 0], X_test[:, 1], c = y_test, cmap = "viridis", s = 10, alpha = 0.5)
plt.scatter(X_test[:,
= mpatches.Patch(color = "yellow", label = "Class 1")
yellow_patch = mpatches.Patch(color = "darkblue", label = "Class 2")
blue_patch = mpatches.Patch(color = "darksalmon", label = "Predict Class 1")
pink_patch = mpatches.Patch(color = "lightblue", label = "Predict Class 2")
lblue_patch = [yellow_patch, blue_patch, pink_patch, lblue_patch], loc = "upper center", bbox_to_anchor = (0.5, 1.25), ncol = 2, fancybox = True, shadow = True)
plt.legend(handles
plt.title(title)r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel( plt.show()
"XOR Dataset - Decision Boundary - Logistic Regression") plot_decision_boundary(X_train_poly, X_test_poly, y_train, y_test, logreg,