Hi, I am trying to implement PCA using SVD in GPU. Following is my code. It doesn’t work when the number of records is greater than or equal to 100K. The number of features is 300 (100K by 300 matrix).
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
from datetime import datetime
def load_data(filepath, header, sep):
df = pd.read_csv(
filepath_or_buffer=filepath,
header=header,
sep=sep)
return df
def split_data(df, features_len):
first_col = df[df.columns[0]]
df[df.columns[0]] = first_col.apply(lambda x: x.split(',')[1])
features = df.ix[:,0:(features_len-2)]
return features
def get_minimum_features(s, retainedVariance):
var_percentage = (torch.cumsum(s, dim=0)/torch.sum(s))*100
_, index = torch.max(torch.gt(var_percentage, retainedVariance), 0)
return index
folder_path = '../pca/dataset/'
data_file = 'mat_200K_300F'
data_frame = load_data(folder_path + data_file, None, ' ')
features = split_data(data_frame, len(data_frame.columns))
normalized_features = StandardScaler().fit_transform(features)
U, s, V = torch.svd(torch.Tensor(normalized_features.T).cuda(), some=True)
k = get_minimum_features(s, 95)
U_reduced = U[:, : k[0]]
Z = torch.mm(torch.Tensor(normalized_features).cuda(), U_reduced.cuda())
The stack trace for when segmentation fault occurs is shown below.
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fff20c19b00 (LWP 21760)]
0x00007ffee15a2db0 in ?? ()
from /sabra/anaconda3/lib/python3.6/site-packages/numexpr/…/…/…/libmkl_avx2.so
Missing separate debuginfos, use: debuginfo-install glibc-2.17-196.el7.x86_64
(gdb) where
#0 0x00007ffee15a2db0 in ?? ()
from /sabra/anaconda3/lib/python3.6/site-packages/numexpr/…/…/…/libmkl_avx2.so
#1 0x0000000000000018 in ?? ()
#2 0x00007fff20c18870 in ?? ()
#3 0x0000000000000018 in ?? ()
#4 0x0000000000000000 in ?? ()
Based on the simple implementation below (2M by 300 matrix) that runs on GPU I think my code fails because a large amount of data is being transferred to and from CPU and GPU.
import torch
x = torch.zeros(2000000, 300).cuda();
u, s, v = torch.svd(x, some=True);
print(u);
Any help on understanding and fixing this issue is highly appreciated.