Dear experts,
I revised the file reading code of a custom dataset of CNN model.
Before
using a lot of .sgy files as ‘inputs’ and one .csv file which includes input file’s names and their corresponding ‘labels’
class cvspanel_dataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.dv_label = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.dv_label)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# data import
data_name = os.path.join(self.root_dir,
self.dv_label.iloc[idx, 0])
gth = segyio.open(data_name, ignore_geometry=True)
data = gth.trace.raw[:]
data = torch.tensor(data[:, :200])
# label import
arr = self.dv_label.iloc[idx, 1:]
arr = np.asarray(arr)
label = arr.astype('float').reshape(-1, 2)
label = np.nan_to_num(label)
label = torch.tensor(label)
label = label.view([-1, 1])
if self.transform:
data = self.transform(data)
if self.transform:
label = self.transform(label)
return data, label
train_dataset = cvspanel_dataset(csv_file='/content/drive/MyDrive/Colab Notebooks/share/data/v_int-z_labels/data40train32.csv',
root_dir='/content/drive/MyDrive/Colab Notebooks/share/data/cvspanels/train32',
transform=None)
test_dataset = cvspanel_dataset(csv_file='/content/drive/MyDrive/Colab Notebooks/share/data/v_int-z_labels/data40test8.csv',
root_dir='/content/drive/MyDrive/Colab Notebooks/share/data/cvspanels/test8',
transform=None)
After
using a lot of .sgy files as ‘inputs’ and a lot of .txt files as ‘labels’
class cvspanel_dataset(Dataset):
def __init__(self, label_dir, data_dir, transform=None):
self.label_dir = label_dir
self.data_dir = data_dir
self.transform = transform
def __len__(self):
return len(self.label_dir)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# data import
filenames = os.listdir(self.label_dir)
# label_t-v0000.txt, ..., label_t-v4999.txt
numbers = [os.path.splitext(os.path.basename(filename))[0].split('v')[-1] for filename in filenames]
# 0000, ..., 0049, 50, ..., 500, ..., 4999
data_names = []
for number in numbers:
data_name = f"cvspanel{number}.sgy"
data_names.append(data_name)
# cvspanel0000.sgy, ..., cvspanel4999.sgy
data_name = data_names[idx]
data_path = os.path.join(self.data_dir, data_name)
# '/content/.../cvspanels_sgy/cvspanel????.sgy'
gth = segyio.open(data_path, ignore_geometry=True)
data = gth.trace.raw[:]
data = torch.tensor(data[:, :200])
# label import
label_names = os.listdir(self.label_dir)
# label_t-v0000.txt, ..., label_t-v4999.txt
label_name = label_names[idx]
label_name = os.path.join(label_dir, label_name)
# '/content/.../label_vsta_t/label_t-v????.txt'
prelabel = np.loadtxt(label_name)
label = np.zeros((20, 2))
label[:prelabel.shape[0], :] = prelabel
label = label.reshape(1, -1)
if self.transform:
data = self.transform(data)
if self.transform:
label = self.transform(label)
return data, label
data_dir = '/content/drive/MyDrive/Colab Notebooks/research_data/cvspanels_sgy'
label_dir = '/content/drive/MyDrive/Colab Notebooks/research_data/label_vsta_t'
train_dataset = cvspanel_dataset(label_dir=label_dir,
data_dir=data_dir,
transform=None)
test_dataset = cvspanel_dataset(label_dir=label_dir,
data_dir=data_dir,
transform=None)
Then the error said that it can’t find the file of:
gth = segyio.open(data_path, ignore_geometry=True)
Error message
FileNotFoundError Traceback (most recent call last)
in <cell line: 181>()
181 for t in range(epochs):
182 print(f"Epoch {t+1}\n-------------------------------")
→ 183 train_loop(train_dataloader, model, loss_fn, optimizer)
184 test_loop(test_dataloader, model, loss_fn)
185 print(“Done!”)6 frames
/usr/local/lib/python3.10/dist-packages/segyio/open.py in open(filename, mode, iline, xline, strict, ignore_geometry, endian)
160
161 from . import _segyio
→ 162 fd = _segyio.segyiofd(str(filename), mode, endians[endian])
163 fd.segyopen()
164 metrics = fd.metrics()FileNotFoundError: [Errno 2] No such file or directory
train_loop
def train_loop(train_dataloader, model, loss_fn, optimizer):
global record_train # Declare record as a global variable
size = len(train_dataloader.dataset)
model.train()
for batch, (data, label) in enumerate(train_dataloader):
data, label = data.float().to(device), label.float().to(device)
# Compute prediction and loss
pred = model(data.unsqueeze(1).repeat(batch_size, 1, 1, 1))
loss = loss_fn(pred, label.squeeze().repeat(batch_size, 1))
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(data)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
record_train = np.vstack((record_train, loss)) # Append the loss to the record array
epochs = 100
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
The revised code of data import worked by itself, but the problem occurs when it is activated in the training code.
# data import -> class custom dataset
import os
import os.path
import segyio
import torch
data_dir = '/content/drive/MyDrive/Colab Notebooks/research_data/cvspanels_sgy'
label_dir = '/content/drive/MyDrive/Colab Notebooks/research_data/label_vsta_t'
order = 0
filenames = os.listdir(label_dir)
numbers = [os.path.splitext(os.path.basename(filename))[0].split('v')[-1] for filename in filenames]
data_names = []
for number in numbers:
data_name = f"cvspanel{number}.sgy"
data_names.append(data_name)
data_name = data_names[order]
data_path = os.path.join(data_dir, data_name)
gth = segyio.open(data_path, ignore_geometry=True)
data = gth.trace.raw[:]
data = torch.tensor(data[:, :200])
print(data)
print(data.shape)
tensor([[0.0000, 0.0000, 1.0000, …, 0.0634, 0.0393, 0.0248],
[0.0000, 0.0000, 1.0000, …, 0.0220, 0.0234, 0.0362],
[0.0000, 0.0000, 1.0000, …, 0.0474, 0.0230, 0.0475],
…,
[0.7780, 0.5869, 0.1883, …, 0.0050, 0.0022, 0.0028],
[0.7809, 0.5821, 0.1827, …, 0.0052, 0.0019, 0.0028],
[0.7838, 0.5773, 0.1776, …, 0.0054, 0.0016, 0.0028]])
torch.Size([100, 200])
Could you help me how can I solve the problem?
Thank you in advance for sparing your time to read my problem.
I appreciate any comments.