I am trying to rewrite a very simple keras code in pytorch. But I couldn’t produce the exact result in pytorch.
keras code:
from keras.models import Model
from keras.layers.core import Dense
from keras.layers import Input
from tensorflow.keras import initializers
import torch
def ToyNet(nb_classes=3, img_dim= (2,)):
model_input = Input(shape=img_dim)
x = Dense(125, activation='relu')(model_input)
x = Dense(125, activation='relu')(x)
x = Dense(nb_classes)(x)
toyNet = Model(inputs=[model_input], outputs=[x], name="DenseNet")
return toyNet
def fn_minus(correct, predicted):
y_max = (tf.reduce_max(correct, axis=1) -0.5) #-----------------> only change here
y_sgm = tf.nn.sigmoid(predicted)
return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted) \
- y_max*tf.reduce_mean(y_sgm, axis=1)
model_minus = Toy Net()
model_minus.summary()
opt = keras.optimizers.SGD(learning_rate= 0.01, momentum= 0.9, nesterov= True)
model_minus.compile(loss=fn_minus, optimizer=opt, metrics=["accuracy"])
print("Finished compiling")
####################
Network training
####################
print("Fitting the model … ")
model_minus.fit(x_train, y_train, batch_size= 1200, epochs=5, verbose=1)
print("Done training …model_minus ")
pytorch code:
import torch
from torch import nn
torch.manual_seed(9988)
import random
random.seed(9988)
class torch_ToyNet(nn.Module):
def __init__(self, in_features=2, nb_classes=3):
super(torch_ToyNet, self).__init__()
self.fc1 = nn.Linear(in_features, 125)
self.fc2 = nn.Linear(125, 125)
self.fc3 = nn.Linear(125,nb_classes)
def forward(self, x):
x = nn.functional.relu(self.fc1(x))
x = nn.functional.relu(self.fc2(x))
x = self.fc3(x)
return x
def torch_fn_minus(correct, predicted):
y_max = torch.max(correct, dim=1).values-0.5
y_sgm = torch.sigmoid(predicted)
return nn.functional.cross_entropy(predicted, correct,,reduction='none')- y_max*torch.mean(y_sgm, axis=1)
model = torch_ToyNet()
optim = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9,nesterov=True)
for values in range(5):
pred_y = model(torch.from_numpy(x_train).float())
loss = torch_fn_minus(torch.from_numpy(y_train), pred_y).mean()
print(values, loss.item())
if values % 100 == 99:
print(values, loss.item())
optim.zero_grad()
loss.backward()
optim.step()
To generate the dataset:
import numpy as np
import matplotlib.pyplot as plt
import os
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(9988)
def get_data():
d = 300
cov = [[4, 0], [0, 4]]
mean1 = [-4, 0]
x1 = np.random.multivariate_normal(mean1, cov, d)
y1 = np.zeros([d,3])
y1[:,0] = 1
plt.plot(x1[:, 0], x1[:, 1],'bx')
mean2 = [4, 0]
x2 = np.random.multivariate_normal(mean2, cov, d)
y2 = np.zeros([d,3])
y2[:,1] = 1
plt.plot(x2[:, 0], x2[:, 1], 'yx')
mean3 = [0, 5]
x3 = np.random.multivariate_normal(mean3, cov, d)
y3 = np.zeros([d,3])
y3[:,2] = 1
plt.plot(x3[:, 0], x3[:, 1], 'rx')
h = 200
dataX = np.concatenate((x1[:h,:], x2[:h,:], x3[:h,:]), axis = 0)
dataY = np.concatenate((y1[:h,:], y2[:h,:], y3[:h,:]), axis = 0)
dist_val = 6.5
def euclid_dist(i,j, mu):
dist = np.sqrt((i-mu[0])*(i-mu[0]) + (j-mu[1])*(j-mu[1]))
return dist
count = 0
while count< 600:
i = np.random.uniform(-15, 15)
j = np.random.uniform(-13, 17)
d1 = euclid_dist(i,j,mean1)
d2 = euclid_dist(i,j,mean2)
d3 = euclid_dist(i,j,mean3)
if d1>dist_val and d2>dist_val and d3>dist_val:
dataX = np.concatenate( (dataX, [[i,j]]), axis=0 )
dataY = np.concatenate( (dataY, [[0.33, 0.33, 0.33]]), axis=0 )
count += 1
plt.plot(dataX[h*3:, 0], dataX[h*3:, 1], 'k,')
plt.plot(-20,20)
plt.plot(20,-20)
plt.plot(-20,-20)
plt.plot(20,20)
plt.xlabel("x")
plt.ylabel("y")
return dataX, dataY
######################### get data and visualize the in-domain data points ##############
x_train, y_train = get_data()
To have both model same initialization, I have tried this snippet also:
model_minus = ToyNet()
model = torch_ToyNet()
weights = model_minus.get_weights()
model.fc1.weight.data=torch.from_numpy(np.transpose(weights[0]))
model.fc1.bias.data=torch.from_numpy(weights[1])
model.fc2.weight.data=torch.from_numpy(np.transpose(weights[2]))
model.fc2.bias.data=torch.from_numpy(weights[3])
model.fc3.weight.data=torch.from_numpy(np.transpose(weights[4]))
model.fc3.bias.data=torch.from_numpy(weights[5])
But, still results doesn’t match even after 4th epoch. Please help me to figure out what I am doing wrong.
keras result:
Epoch 1/5
1/1 [==============================] - 0s 225ms/step - loss: 1.2290 - accuracy: 0.3342
Epoch 2/5
1/1 [==============================] - 0s 3ms/step - loss: 1.1335 - accuracy: 0.3242
Epoch 3/5
1/1 [==============================] - 0s 4ms/step - loss: 1.0568 - accuracy: 0.3450
Epoch 4/5
1/1 [==============================] - 0s 6ms/step - loss: 1.0024 - accuracy: 0.4467
Epoch 5/5
1/1 [==============================] - 0s 4ms/step - loss: 0.9697 - accuracy: 0.5575
torch result (loss):
0 1.2290266651050497
1 1.1339280305054038
2 1.057344033067301
3 1.0029294972211125
4 0.9699580834043524