Hi,
I’m trying to use a ResNet-Model for regression. While training the network I encounter the following error: RuntimeError: running_mean should contain 13140 elements not 64
The input and target are tensors with 26280 entries:
X_train: tensor([[29.3351, 29.1390, 29.8465, …, 31.1812, 33.4491, 34.2976]])
y_train: tensor([[14.6676, 14.5695, 14.9232, …, 15.1755, 15.5906, 16.7245]])
Changing the input channels in bn1 and conv1 was not successful. This is my entire code I’m using:
import os
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import torch
import torch.nn as nn
from math import ceil
import torch.optim as optim
import numpy as np
###get energy consumption
csv_simulated_path = "/content/drive/MyDrive/Colab Notebooks/BA6_SimulatedEnergyConsumption12.CSV"
csv_measured_path = "/content/drive/MyDrive/Colab Notebooks/BA6_SyntheticEnergyConsumption12.CSV"
df_simulated = pd.read_csv(csv_simulated_path)
df_measured = pd.read_csv(csv_measured_path)
# creating DataFrame from measured_data
y = df_measured[['synthetic heat consumption']].values
targets_df = pd.DataFrame(data=y)
targets_df.columns = ['synthetic heat consumption']
# creating tensor from targets_df
torch_tensor = torch.tensor(targets_df['synthetic heat consumption'].values)
df_simulated['Date Time'] = pd.to_datetime(df_simulated['Date Time'])
###print(df['Date Time'].dtype)
#set datetime as index
df_simulated = df_simulated.set_index(df_simulated['Date Time'])
#drop datetime column
df_simulated.drop('Date Time', axis=1, inplace=True)
#data extraction
X = df_simulated.values
y = df_measured[['synthetic heat consumption']].values
split = ceil(len(df_simulated) * 1)
X_train01 = X[:split]
y_train01 = y[:split]
X_test = X[split:]
y_test = y[split:]
###ResNet model
class block(nn.Module):
def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
super(block, self).__init__()
self.expansion = 4
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
self.bn1 = nn.BatchNorm1d(out_channels)
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
self.bn2 = nn.BatchNorm1d(out_channels)
self.conv3 = nn.Conv1d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
self.bn3 = nn.BatchNorm1d(out_channels*self.expansion)
self.relu = nn.ReLU()
self.identity_downsample = identity_downsample
def forward(self,x):
identity = x
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.conv3(x)
x = self.bn3(x)
if self.identity_downsample is not None:
identity = self.identity_downsample(identity)
x += identity
x = self.relu(x)
return x
class ResNet(nn.Module):
def __init__(self, block, layers, image_channels, num_classes):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv1d(image_channels, 64, kernel_size=7, stride=2, padding=3) #initial layer
self.bn1 = nn.BatchNorm1d(64) #initial layer
self.relu = nn.ReLU() #initial layer
self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) #initial layer
#ResNet layers
self.layer1 = self._make_layer(block, layers[0], out_channels=64, stride=1)
self.layer2 = self._make_layer(block, layers[1], out_channels=128, stride=2)
self.layer3 = self._make_layer(block, layers[2], out_channels=256, stride=2)
self.layer4 = self._make_layer(block, layers[3], out_channels=512, stride=2)
self.avgpool = nn.AdaptiveAvgPool1d((1))
self.fc = nn.Linear(512*4, num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc(x)
return x
def _make_layer(self, block, num_residual_blocks, out_channels, stride):
identity_downsample = None
layers = []
if stride != 1 or self.in_channels != out_channels*4:
identity_downsample = nn.Sequential(nn.Conv1d(self.in_channels, out_channels*4, kernel_size=1, stride=stride), nn.BatchNorm1d(out_channels*4))
layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
self.in_channels = out_channels*4
for i in range(num_residual_blocks - 1):
layers.append(block(self.in_channels, out_channels)) # outchannels: 256
return nn.Sequential(*layers)
def ResNetCAL(img_channels=1, num_classes=1):
return ResNet(block, [3,4,6,3], img_channels, num_classes)
###training method
#hyperparameters
learning_rate = 0.001
num_epochs = 12
input_size = 1
num_classes = 1
#initialize ResNet
model = ResNetCAL()
#loss and optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_func = nn.MSELoss()
#train network
#prepare input data
X_train01 = X_train01.astype(np.float32)
y_train01 = y_train01.astype(np.float32)
X_train01 = torch.from_numpy(X_train01)
y_train01 = torch.from_numpy(y_train01)
import matplotlib.pyplot as plt
plt.plot(X_train01)
plt.show()
X_train = X_train01.T
y_train = y_train01.T
print('X_train:', X_train, 'y_train:', y_train)
inputs = X_train
print('inputs:',inputs.type)
outputs = y_train
model = model
for epoch in range(num_epochs):
print('epoch:', epoch)
prediction = model(inputs)
loss = loss_func(prediction, outputs)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 10 == 0:
# plot and show learning process
plt.plot(X_train01)
plt.plot(y_train01)
prediction = model(inputs)
plt.plot(prediction.data.numpy(), 'r-', lw=2)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 10, 'color': 'red'})
plt.pause(0.1)
plt.show()