import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np # read the data which will be used for training, validation and testing of Recurrent neural network model
df = pd.read_csv(‘/kaggle/input/temperature/train.csv’, header=None) print('The data consists of ', df.shape[0], 'rows and ', df.shape[1], ‘columns’) def Data_splitting(df):
# i am going to split the data 80% training, 10% validation and 10% testing
train_valid , test = train_test_split(df, test_size= 0.1, random_state=42, shuffle=True)
# split the train and valid data now to 80% and 10%
train, valid = train_test_split(train_valid, test_size= 0.1, random_state=42, shuffle=True)
print('== Train has', round(len(train)/len(df) * 100, 2), '% of the data ==')
print('== Valid has', round(len(valid)/len(df) * 100, 2), '% of the data ==')
print('== Test has ', round(len(test)/len(df) * 100,2), '% of the data ==')
return train, valid, test # get the indices for each temperature in both input and output
def get_indices(df):
# get the input and the output indices
cols = list(df.columns)
input_width = 90
label_width = 7
step = input_width + label_width
input_indices =
label_indices =
#print(len(cols)-step)
for i in range(0, len(cols), 1):
if i + step <= len(cols)-(step):
in_la = slice(i, i + step)
elif i + step == len(cols):
in_la = slice(i, i + step)
inputs = [x for x in range(in_la.start, in_la.stop-7)]
labels = [x for x in range(in_la.stop-7, in_la.stop)]
input_indices.append(inputs)
label_indices.append(labels)
if i + step == len(cols):
break
print(' == Now we have a list of indices for the input temperatures and the label temperatures == ')
return input_indices, label_indices def fill_df(x, y):
df_indices = pd.DataFrame(columns=['features indices', 'label indices'], index=[i for i in range(len(x))])
# filling the dataframe
for i in range(len(df_indices)):
df_indices['features indices'][i] = x[i]
df_indices['label indices'][i] = y[i]
print(" === The dataframe is filled with indices === ")
return df_indices def ready_data(df_indices, df):
for i in tqdm(range(len(df_indices))):
lis_in = df_indices['features indices'][i]
lis_la = df_indices['label indices'][i]
for j in range(len(df)):
tmp_in = [df[el][j] for el in lis_in]
tmp_la = [df[el][j] for el in lis_la] + [0] * 83
df_indices['features indices'][i] = tmp_in
df_indices['label indices'][i] = tmp_la
print(" == The mapping has been finished == ")
return df_indices # implementation of the model
class MyModel(nn.Module):
def init(self, input_size, hidden_size, output_size, num_layers):
super(MyModel, self).init()
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
out, _ = self.rnn(x)
out = self.fc(out)
return out class MyDataSet(Dataset):
def __init__(self, tensor):
self.tensor = tensor
def __len__(self):
return len(self.tensor)
def __getitem__(self, idx):
return self.tensor[idx] # Convert the data to Numpy array
def convert_to_array(df_indices):
# define the features as Numpy array
features = df_indices[‘features indices’].values
labels = df_indices[‘label indices’].values
sequences = zip(features, labels)
sequences = list(sequences)
sequences = torch.tensor(sequences)
print(" === Converted to Numpy arrays === “)
return sequences train, valid, test = Data_splitting(df) x_train, y_train = get_indices(train)
x_valid, y_valid = get_indices(valid)
x_test, y_test = get_indices(test) df_indices_train = fill_df(x_train, y_train)
df_indices_valid = fill_df(x_valid, y_valid)
df_indices_test = fill_df(x_test, y_test) res_train = ready_data(df_indices_train, df)
res_valid = ready_data(df_indices_valid, df)
res_test = ready_data(df_indices_test, df) resu_train = convert_to_array(res_train)
resu_valid = convert_to_array(res_valid)
resu_test = convert_to_array(res_test) flattend_train = resu_train.view(-1)
flattend_valid = resu_valid.view(-1)
flattend_test = resu_test.view(-1) data_train = MyDataSet(flattend_train)
data_valid = MyDataSet(flattend_valid)
data_test= MyDataSet(flattend_test) # Configuration
batch_size = 32
train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(data_valid, batch_size=batch_size)
print(” === Data are loaded === “) # model hyperparameters defining
input_size = 1
hidden_size = 64
output_size = 7
num_layers = 1
print(” === Hyperparameters are defined === “) # create an object from the model
model = MyModel(input_size, hidden_size, output_size, num_layers)
print(” === An instance of the model is created === ") # defining the loss function and optimizer
mean squared Error loss function
criteria = nn.MSELoss()
Adam’s optimizer with learning rate of 0.001
optimizer = optim.Adam(model.parameters(), lr=0.001)
print(" === The loss function and the optimizers are defined === ") # Training loop
num_epoch = 10
for epoch in range(num_epoch):
for batch in train_loader:
inputs, targets = batch
optimizer.zero_grad()
outputs = model(inputs)
loss = criteria(outputs, targets)
loss.backward()
optimizer.step()
print(f’Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}') ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[291], line 6
4 for epoch in range(num_epoch):
5 for batch in train_loader:
----> 6 inputs, targets = batch
7 optimizer.zero_grad()
8 outputs = model(inputs)
ValueError: too many values to unpack (expected 2)
Could any one tell me how to solve this problem. Thanks in advance