Prediction of Temperature

Mohamed_Abokahf · November 13, 2023, 5:56pm

import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np # read the data which will be used for training, validation and testing of Recurrent neural network model
df = pd.read_csv(‘/kaggle/input/temperature/train.csv’, header=None) print('The data consists of ', df.shape[0], 'rows and ', df.shape[1], ‘columns’) def Data_splitting(df):
# i am going to split the data 80% training, 10% validation and 10% testing
train_valid , test = train_test_split(df, test_size= 0.1, random_state=42, shuffle=True)

# split the train and valid data now to 80% and 10% 
train, valid = train_test_split(train_valid, test_size= 0.1, random_state=42, shuffle=True)

print('== Train has', round(len(train)/len(df) * 100, 2), '% of the data ==')
print('== Valid has', round(len(valid)/len(df) * 100, 2), '% of the data ==')
print('== Test has ', round(len(test)/len(df) * 100,2), '% of the data ==') 
return train, valid, test # get the indices for each temperature in both input and output

def get_indices(df):
# get the input and the output indices
cols = list(df.columns)
input_width = 90
label_width = 7
step = input_width + label_width
input_indices =
label_indices =
#print(len(cols)-step)
for i in range(0, len(cols), 1):
if i + step <= len(cols)-(step):
in_la = slice(i, i + step)
elif i + step == len(cols):
in_la = slice(i, i + step)

    inputs = [x for x in range(in_la.start, in_la.stop-7)]
    labels = [x for x in range(in_la.stop-7, in_la.stop)]
    input_indices.append(inputs)
    label_indices.append(labels)
    if i + step == len(cols):
        break
print(' == Now we have a list of indices for the input temperatures and the label temperatures == ')
return input_indices, label_indices def fill_df(x, y):
df_indices = pd.DataFrame(columns=['features indices', 'label indices'], index=[i for i in range(len(x))])
# filling the dataframe 
for i in range(len(df_indices)): 
    df_indices['features indices'][i] = x[i]
    df_indices['label indices'][i] = y[i]
print(" === The dataframe is filled with indices === ")
return df_indices def ready_data(df_indices, df):
for i in tqdm(range(len(df_indices))):
    lis_in = df_indices['features indices'][i]
    lis_la = df_indices['label indices'][i]
    for j in range(len(df)):
        tmp_in = [df[el][j] for el in lis_in]
        tmp_la = [df[el][j] for el in lis_la] + [0] * 83
        df_indices['features indices'][i] = tmp_in
        df_indices['label indices'][i] = tmp_la
print(" == The mapping has been finished == ")
return df_indices # implementation of the model

class MyModel(nn.Module):
def init(self, input_size, hidden_size, output_size, num_layers):
super(MyModel, self).init()
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)

def forward(self, x):
    out, _ = self.rnn(x)
    out = self.fc(out)
    return out class MyDataSet(Dataset):
def __init__(self, tensor):
    self.tensor = tensor
    
def __len__(self):
    return len(self.tensor)

def __getitem__(self, idx):
    return self.tensor[idx] # Convert the data to Numpy array

def convert_to_array(df_indices):
# define the features as Numpy array
features = df_indices[‘features indices’].values
labels = df_indices[‘label indices’].values
sequences = zip(features, labels)
sequences = list(sequences)
sequences = torch.tensor(sequences)
print(" === Converted to Numpy arrays === “)
return sequences train, valid, test = Data_splitting(df) x_train, y_train = get_indices(train)
x_valid, y_valid = get_indices(valid)
x_test, y_test = get_indices(test) df_indices_train = fill_df(x_train, y_train)
df_indices_valid = fill_df(x_valid, y_valid)
df_indices_test = fill_df(x_test, y_test) res_train = ready_data(df_indices_train, df)
res_valid = ready_data(df_indices_valid, df)
res_test = ready_data(df_indices_test, df) resu_train = convert_to_array(res_train)
resu_valid = convert_to_array(res_valid)
resu_test = convert_to_array(res_test) flattend_train = resu_train.view(-1)
flattend_valid = resu_valid.view(-1)
flattend_test = resu_test.view(-1) data_train = MyDataSet(flattend_train)
data_valid = MyDataSet(flattend_valid)
data_test= MyDataSet(flattend_test) # Configuration
batch_size = 32
train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(data_valid, batch_size=batch_size)
print(” === Data are loaded === “) # model hyperparameters defining
input_size = 1
hidden_size = 64
output_size = 7
num_layers = 1
print(” === Hyperparameters are defined === “) # create an object from the model
model = MyModel(input_size, hidden_size, output_size, num_layers)
print(” === An instance of the model is created === ") # defining the loss function and optimizer

mean squared Error loss function

criteria = nn.MSELoss()

Adam’s optimizer with learning rate of 0.001

optimizer = optim.Adam(model.parameters(), lr=0.001)

print(" === The loss function and the optimizers are defined === ") # Training loop

num_epoch = 10
for epoch in range(num_epoch):
for batch in train_loader:
inputs, targets = batch
optimizer.zero_grad()
outputs = model(inputs)
loss = criteria(outputs, targets)
loss.backward()
optimizer.step()
print(f’Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}') ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[291], line 6
4 for epoch in range(num_epoch):
5 for batch in train_loader:
----> 6 inputs, targets = batch
7 optimizer.zero_grad()
8 outputs = model(inputs)

ValueError: too many values to unpack (expected 2)

Could any one tell me how to solve this problem. Thanks in advance

ptrblck · November 13, 2023, 6:52pm

You code is not properly formatted and thus hard to read, but based on the error message it seems batch does not contain two tensors and the unpacking thus fails. Print batch and make sure you can properly unpack it into two objects.

Mohamed_Abokahf · November 14, 2023, 6:04pm

Hallo thanks a lot for the response can you please have a look at this notebook :

it is working, but it is wrong. for each 90 temperatures I need the model to predict the next 7 temperatures, so I don’t know how to prepare the data before I feed the model with it. for example the input size I wrote now 90, but correct is 1 and the sequence length for the input is 90 and the output is temperature but I need 7 values so should it be 1 with seq length of 7 or what. Thanks a lot for your help