Very high loss that changes only slightly

technobusfahrer · May 25, 2021, 7:10pm

Hello PyTorch-Community,

I am new to deep learning and I am currently taking a class on the topic in college. For this we (me and a class mate) are supposed to find a topic that interests us and try deep learning in its sphere.
We are trying to build a neural net that can be used to forecast the price of various cryptocurrencies based on their previous values, the amount of trades etc.

We have managed to set it up and an epoch loss is calculated by the programm but it is always staying the same or changes are very minor. We think that the issue is that the value of cryptocurrencies is very hard to be forecasted based on our data but maybe we have missed something?

I have included the code below. Thanks a lot in advance for any help!

run.py (the main script):

from CryptoPredicter.util.pred_data import PredicterDataset

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import custom_lstm as lstm
import matplotlib.pyplot as plt


seq_size = 100
batch_size = 100

# See documentation on PredictorDataset
data = PredicterDataset(seq_size, interval="Hourly", refresh=False, data_to_consider=100, coin_name='ETH')
# Get input size from data
input_size = data.__getitem__(0)[0].shape[1]
# Creating DataLoader
loader = DataLoader(dataset=data, batch_size=batch_size)

net = OurNet(input_size)

criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.5)
# optimizer = optim.LBFGS(net.parameters(), lr=0.01)
epoch_loss_list = []

for epoch in range(50):

    running_loss = 0

    for inputs, labels in loader:
        optimizer.zero_grad()
        outputs = net(inputs.float())
        loss = criterion(torch.squeeze(outputs), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    epoch_loss = running_loss / len(loader)
    epoch_loss_list.append(epoch_loss)

    print('Epoch loss: ' + str(running_loss / len(loader)))

OurNet.py (were we instantiate our custom LSTM):

import torch
import torch.nn as nn
import math

class CustomLSTM(nn.Module): #Man kann auch das LSTM von Pytorch nehmen
    def __init__(self, input_sz: int, hidden_sz: int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz

        self.U_f = nn.Parameter(torch.Tensor(input_sz, hidden_sz)) # hängt vom Input ab
        self.V_f = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_f = nn.Parameter(torch.Tensor(hidden_sz))

        self.U_i = nn.Parameter(torch.Tensor(input_sz, hidden_sz)) # hängt vom Input ab
        self.V_i = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_i = nn.Parameter(torch.Tensor(hidden_sz))

        self.U_o = nn.Parameter(torch.Tensor(input_sz, hidden_sz)) # hängt vom Input ab
        self.V_o = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_o = nn.Parameter(torch.Tensor(hidden_sz))

        self.U_g = nn.Parameter(torch.Tensor(input_sz, hidden_sz)) # hängt vom Input ab
        self.V_g = nn.Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_g = nn.Parameter(torch.Tensor(hidden_sz))

        self.init_weights()

    def init_weights(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, x):
        bs, seq_sz, _ = x.shape  # assumes x.shape represents (batch_size, sequence_size, input_size)
        hidden_seq = []

        # c_t und h_t müssen hier initialisiert werden
        c_t = torch.zeros(bs, self.hidden_size)
        h_t = torch.zeros(bs, self.hidden_size)

        for t in range(seq_sz):
            x_t = x[:, t, :]
            # h_t = torch.sigmoid(x_t @ self.W_f + h_t @ self.U_f + self.b_f)
            f_t = torch.sigmoid(x_t @ self.U_f + h_t @ self.V_f + self.b_f)  # @ ist Multiplikation von Matrizen
            i_t = torch.sigmoid(x_t @ self.U_i + h_t @ self.V_i + self.b_i)
            o_t = torch.sigmoid(x_t @ self.U_o + h_t @ self.V_o + self.b_o)
            g_t = torch.tanh((x_t @ self.U_g + h_t @ self.V_g + self.b_g))
            c_t = f_t * c_t + i_t * g_t
            h_t = o_t * torch.tanh(c_t)

            hidden_seq.append(h_t.unsqueeze(
                0))  # transform h_t from shape (batch_size, input_size) to shape (1, batch_size, input_size)

        # reshape hidden_seq
        hidden_seq = torch.cat(hidden_seq,
                               dim=0)  # concatenate list of tensors into one tensor along dimension 0 (sequence_size, batch_size, input_size)
        hidden_seq = hidden_seq.transpose(0,
                                          1).contiguous()  # exchange new dimension with batch dimension so that new tensor has required shape (batch_size, sequence_size, input_size)
        return h_t, hidden_seq

downloader.py (we use it to download the currency data):

import json
import os
import pathlib
import pickle
import shutil
import ssl
from os import listdir

import numpy as np
import pandas as pd
from urllib import request

from bs4 import BeautifulSoup
from sklearn import preprocessing

base_url = 'available upon request'
download_base_url = 'available upon request'
path = str(pathlib.Path(__file__).parent.absolute()) + os.sep + 'data' + os.sep
samples_path = path + "Samples" + os.sep
mil_seconds_of_one_day = 86400000

MOZILLA_HEADER = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
    'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
    'Accept-Encoding': 'gzip, deflate, br'}

ssl._create_default_https_context = ssl._create_unverified_context


def prepare_data(interval, refresh, data_to_consider):
    download(interval, refresh)
    ### Create sample_path
    if not os.path.exists(samples_path + interval):
        os.makedirs(samples_path + interval)
    # List of all available coins and their data
    all_coins = listdir(path + interval)
    # Getting newest unix for calculation data to consider and list of all avialable timestamps
    all_ts = get_newest_ts_and_all_ts(path + interval + os.sep + all_coins[0])
    # Calculate the min timestamp that should be considered
    min_timestamp = get_min_unix_timestamp(data_to_consider, max(all_ts))
    # Filter all relevant timestamps
    ts_list = list(filter(lambda all_ts: all_ts > min_timestamp, all_ts))
    # Creating samples data
    print('Taking %s timestamps in consideration' % (len(ts_list) - 1))
    # Check if samples already exist else delete data
    if check_if_samples_exist(interval, len(ts_list) - 1, refresh):
        print('Sample data already exist')
        return
    count = 0
    print('Creating samples ...')
    for ts in ts_list:
        # Leave out first timestamp because we have no y data for this
        if ts == max(all_ts):
            continue
        count += 1
        percent = str(((count / (len(ts_list) - 1)) * 100))
        print('... %s/%s -> %s' % (count, len(ts_list) - 1, str(((count / (len(ts_list) - 1)) * 100))))

        dataf = pd.DataFrame(columns=['name', 'open', 'high', 'low', 'close', 'Volume', 'Volume USDT', 'tradecount'])
        for coin in all_coins:
            with open(path + interval + os.sep + coin, 'r') as f:
                df = pd.read_csv(f, sep=',', header=1)
                feature = df.loc[df['unix'] == ts].values[:, 3:]
                if not len(feature) == 1:
                    raise RuntimeError('No or to much data found')
                dataf.loc[len(dataf)] = [coin] + feature[0].tolist()

        # Normalize
        min_max_scaler = preprocessing.MinMaxScaler()
        dataf.iloc[:, 1:] = min_max_scaler.fit_transform(dataf.iloc[:, 1:])

        with open(samples_path + interval + os.sep + str(int(ts / 1000)) + ".pkl", 'wb') as f:
            pickle.dump(dataf, f, pickle.HIGHEST_PROTOCOL)


def check_if_samples_exist(interval, ts, refresh):
    if os.path.exists(samples_path + interval):
        if len(listdir(samples_path + interval)) == ts and not refresh:
            return True
    return False


def get_newest_ts_and_all_ts(file):
    with open(file, 'r') as file:
        return pd.read_csv(file, header=1)['unix']
    raise EOFError("Could not find newest date. Please check your data.")


def get_min_unix_timestamp(data_to_consider, newest_ts):
    min_unix_timestamp = newest_ts - mil_seconds_of_one_day * data_to_consider
    return min_unix_timestamp


def download(interval, refresh):
    ### Check if download is needed
    if os.path.exists(path + interval) and not refresh:
        print(interval + " - Data already exist")
        return
    ### Delete old data
    if os.path.exists(path + interval):
        shutil.rmtree(path + interval)
    ### Make sure path exist
    os.makedirs(path + interval)
    ### Get all url to donwload
    urls = get_download_urls(interval)
    i = 0
    ### Downlad data
    print("Downloading %s data ..." % interval)
    for url in urls:
        i += 1
        print("Downloading ... %s/%s" % (str(i), str(len(urls))))
        download_url = download_base_url + url
        request.urlretrieve(download_url, path + interval + os.sep + get_name(download_url))
    print("... download done!")


def get_download_urls(interval):
    ### search for all links and see if link contains USDT. If yes -> it's data we want to download
    page = request.urlopen(base_url).read()
    soup = BeautifulSoup(page, 'html.parser')

    links = soup.findAll('a')
    download_links = [link['href'] for link in links if
                      "USDT".upper() in link['href'].upper()
                      and interval in link.text
                      and "futures".upper() not in link['href'].upper()]
    return download_links


def get_name(link):
    ### just give us the name of the crypto coin
    return link.split("_")[1][:-4]

pred_data.py (we use to transform the data into a dataframe):

import json
import os
import pathlib
import pickle
import random
from datetime import datetime

import pandas as pd

from os import listdir

import torch
from torch.utils.data import Dataset

from CryptoPredicter.util.Downloader import prepare_data
from CryptoPredicter.util.interval_to_unix import int_to_unix

path_data = str(pathlib.Path(__file__).parent.absolute()) + os.sep + 'data' + os.sep
path_sample = path_data + 'Samples' + os.sep


class PredicterDataset(Dataset):
    '''
    seq_size = How many data should be delivered for one run.
    interval = Daily, Hourly100 or Minute -> describes the data detail gradient.
    refresh = Set to "True" if data should be reloaded from internet.
    data_to_consider = Defines how many days in the past should be considered.
    coin_name = The short name of coin that should be predicted excepted as input data -> ['ADA', 'BNB', 'BTC', 'BTT', 'DASH', 'EOS', 'ETC', 'ETH', 'LINK', 'LTC', 'NEO', 'QTUM', 'TRX', 'XLM', 'XMR', 'XRP', 'ZEC']
    '''

    def __init__(self, seq_size, interval="Minute", refresh=False, data_to_consider=100, coin_name='BTC'):
        prepare_data(interval, refresh, data_to_consider)
        self.seq_size = seq_size
        self.paths = listdir(path_sample + interval)
        self.interval = interval
        self.y_data = get_y_data(coin_name, interval)
        self.coin_name = coin_name
        random.shuffle(self.paths)

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        data_link = path_sample + os.sep + self.interval + os.sep + str(self.paths[idx])
        with open(data_link, 'rb') as f:
            item = pickle.load(f)
        x = torch.tensor(item[['open', 'high', 'low', 'close', 'Volume', 'Volume USDT', 'tradecount']].values)
        x = x[len(x) - self.seq_size: len(x)]
        timestamp = int(self.paths[idx][:-4]) * 1000
        try:
            y = float(self.y_data.loc[self.y_data['unix'] == timestamp]['diff'].values[0])
        except IndexError as e:
            return self.__getitem__(idx + 1)
        return x, y


def get_y_data(coin_name, interval):
    with open(path_data + interval + os.sep + coin_name) as f:
        # Load dataframe of the needed coin
        df = pd.read_csv(f, sep=',', header=1).copy()
        # Set unix timestamp to t-1
        df['unix'] = df['unix'] - int_to_unix[interval]
        # Calculate diff
        df['diff'] = df['close'] - df['open']
        # Drop not needed columns
        df = df.drop(columns=df.columns[1:-1].values.tolist())
    return df

Any help is kindly appreciated!