So, I had ChatGPT help me stumble through this unfamiliar world, where I found a problem but had my post deleted on StackOverflow, but I am now tackling a bigger thing that I want to make more. ChatGPT showed me the methods and I followed them to avoid the documentation. However, there is an error with the text input! I am making a Pixel Art Generation AI that I call Pixel Putter, which makes 8x8 pixel art images. I’m starting small with a size of 8 training data things. But loading up the text training data has proved to not work with the current version, and 1.7.0 is missing from the install list PIP supplies. Could I have help updating this code?
import torch as t
import torchvision as tv
import torchtext as tt
import torch.nn as nn
import torch.optim as toptim
import torch.utils.data as tdata
import numpy as np
from torch.utils.data import Dataset
import os
print(t.__version__)
# Set the directory to the current directory
os.chdir(os.path.dirname(os.path.realpath(__file__)))
# Create a transform that normalizes the pixel values to a range between 0 and 1
normalize_transform = tv.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
# Define a transform that applies random horizontal flips, random crops, and color jittering to the images
data_augmentation_transform = tv.transforms.Compose([
tv.transforms.RandomHorizontalFlip(),
tv.transforms.RandomCrop(size=8, padding=2),
tv.transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1)
])
print(os.getcwd())
if os.path.exists('C:\\Users\\----\\Documents\\Python\\AI\\ChatGPT\'s Beginner Suggestion\\Size 8\\Dataset\\Images\\') and os.path.exists('C:\\Users\\----\\Documents\\Python\\AI\\ChatGPT\'s Beginner Suggestion\\Size 8\\Dataset\\Text\\'):
print("Pathcheck Complete.")
else:
print("Pathcheck Failed.")
# Apply the data augmentation transform to the training images
image_dataset = tv.datasets.ImageFolder(
'C:\\Users\\----\\Documents\\Python\\AI\\ChatGPT\'s Beginner Suggestion\\Size 8\\Dataset\\Images\\',
transform=tv.transforms.Compose([
data_augmentation_transform,
tv.transforms.ToTensor(),
normalize_transform
])
)
# THIS IS WHERE THE PROBLEM IS: # THIS IS WHERE THE PROBLEM IS: # THIS IS WHERE THE PROBLEM IS:
# THIS IS WHERE THE PROBLEM IS: # THIS IS WHERE THE PROBLEM IS: # THIS IS WHERE THE PROBLEM IS:
max_length = 32
# Load the text prompts
text_dataset = tt.datasets.TextFolder(
'C:\\Users\\----\\Documents\\Python\\AI\\ChatGPT\'s Beginner Suggestion\\Size 8\\Dataset\\Text\\',
tt.data.Field(sequential=True, fix_length=max_length)
)
# Build the vocabulary for the text prompts, including all words and initializing out-of-vocabulary words with the mean value of the vocabulary
text_dataset.fields['prompt'].build_vocab(text_dataset, min_freq=1, unk_init=t.Tensor.normal_)
# Pad the text prompts to a consistent length
text_dataset = tt.datasets.TextFolder(
'/Dataset/Text',
tt.data.Field(sequential=True, fix_length=max_length)
)
# Split the data into training and validation sets
train_size = int(0.8 * len(image_dataset))
val_size = len(image_dataset) - train_size
train_dataset, val_dataset = tdata.random_split(image_dataset, [train_size, val_size])
# Create a dataloader for the training data
batch_size = 4
train_dataloader = tdata.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create a dataloader for the validation data
val_dataloader = tdata.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
class PixelArtGenerator(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
self.conv6 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
self.conv7 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv8 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv9 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv10 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv11 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv12 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv13 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv14 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv15 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv16 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
model = PixelArtGenerator()
loss_fn = nn.MSELoss()
optimizer = toptim.Adam(model.parameters())
# Set the model to training mode
model.train()
# Loop over the training data
for text, image in train_dataloader:
# Clear the gradients
optimizer.zero_grad()
# Forward pass
output = model(text)
# Compute the loss
loss = loss_fn(output, image)
# Backward pass
loss.backward()
# Update the model weights
optimizer.step()
# Set the model to evaluation mode
model.eval()
# Initialize a list to store the MSE values
mse_values = []
# Loop over the validation data
for text, image in val_dataloader:
# Forward pass
output = model(text)
# Compute the MSE
mse = loss_fn(output, image).item()
# Add the MSE to the list
mse_values.append(mse)
# Compute the mean MSE
mean_mse = sum(mse_values) / len(mse_values)
t.save(model.state_dict(), 'pixput.ai')