Error in Positional embedding layer

Hello all , I’m trying to implement a GPT model .But firstly for the input part I’m trying to including positional embedding to my input embedding , but this error is occurring and I don’t know how to resolve it. Can anyone please help me out?

Code :
from types import ModuleType
class GPT(nn.Module):
def init(self, n_embd, n_head, vocab_size,max_seq_len=32,device=“cpu”):
super().init()
self.token_embedding = nn.Embedding(vocab_size, n_embd).to(device)
self.position_embedding = nn.Embedding(max_seq_len,n_embd,device=device)
self.blocks = nn.Sequential(*(Block(n_embd,n_head,device) for i in range(n_head))).to(device)
self.ln = nn.LayerNorm(n_embd).to(device)
self.fc = nn.Linear(n_embd,64).to(device)

def forward(self, idx, targets=None):
    B,T = idx.shape
    out = self.token_embedding(idx)
    positions = torch.arange(0, T, device=idx.device).unsqueeze(0).expand(B, -1)
    position_embedding = self.position_embedding(positions)  # (batch_size, sequence_length)
    out = out + position_embedding
    out = self.blocks(out)
    out = self.ln(out)
    logits = self.fc(out)
    print(f'Logits size{logits.size()}')
    if targets is None:
      return logits , None
    loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
    return logits,loss

def generate(model,idx,start_char=“F”, max_new_tokens=100, top_p=0.9, top_k=50, temperature=1):
model.eval()
generated = [start_char]
filter_value=-float(‘Inf’)
for _ in range(max_new_tokens):
logits, _ = model(idx)
next_char_id = torch.argmax(logits).item()
next_char = [k for k, v in c2id.items() if v == next_char_id][0]
current_char = next_char
generated += current_char
generated.append(next_token.item())
return generated

Example usage

vocab_size = 5000 # Example vocab size
embed_size = 64 # Example embedding size
max_length = 32 # Example max length

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
model = GPT(embed_size, 4,vocab_size,device)

idx = idx

targets = idy
logits, loss = model.forward(idx,targets)

print(f’Logits: {logits.size()}, Loss: {loss.item()}')

#Generating word before training
generated_word = generate(model,idx)
print(f’Generated word before training: {generated_word}')

Error :
TypeError Traceback (most recent call last)
in <cell line: 47>()
45
46 device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
—> 47 model = GPT(embed_size, 4,vocab_size,device)
48
49 idx = idx

1 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py in init(self, num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight, _freeze, device, dtype)
141 self.scale_grad_by_freq = scale_grad_by_freq
142 if _weight is None:
→ 143 self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs),
144 requires_grad=not _freeze)
145 self.reset_parameters()

TypeError: empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=str), but expected one of:

  • (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
  • (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)

Could you print all inputs to this line of code, please?

 self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs),

Hello, sure .
num_embeddings = 64

I’m a bit confused on embedding_dim value as I didn’t give that in code . Id actually related to another variable name ?

Your model’s __init__ is most likely defining it, so try to isolate the arguments to understand why the line of code fails.