Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_mm

This is my model. Only showing the __ init __() and forward() functions

class BERTplusAoA(nn.Module):
    def __init__(self, config, options): 
        super(BERTplusAoA, self).__init__()
        self.bert = BertModel.from_pretrained(
        options.model_name_or_path,
        from_tf=bool(".ckpt" in options.model_name_or_path),
        config=config,
        cache_dir=options.cache_dir if options.cache_dir else None,
        ) 
        self.l0 = nn.Linear(1024, 2)
        self.lq = nn.Linear(1024,256)
        self.lc = nn.Linear(1024,256)
        self.soft=nn.Softmax(dim=-1)
        self.wb = torch.sigmoid(nn.Parameter(torch.zeros(1)))
        self.we = torch.sigmoid(nn.Parameter(torch.zeros(1)))

def forward(self, input_ids, attention_mask, token_type_ids,start_positions,end_positions):
        sequence_out, _ = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        # (batch_size,num_tokens,1024)
        logits = self.l0(sequence_out)
        # (batch_size,num_tokens,2)
        start_logits,end_logits=logits.split(1,dim=-1)
        # (batch_size,num_tokens,1)        # (batch_size,num_tokens,1)
        start_logits=start_logits.squeeze(-1)
        end_logits=end_logits.squeeze(-1)
        # (batch_size,num_tokens)        # (batch_size,num_tokens)
        
        lb = F.softmax(start_logits,dim=-1)
        le = F.softmax(end_logits,dim=-1)

        hq,hc=self.splitting_(sequence_out,token_type_ids)
        #return hq,hc,sequence_out
        # hq, hc are list of tensors.

        hq,hq_len=self._batchify(hq,include_lengths=True)
        hc,hc_len=self._batchify(hc,include_lengths=True)
        # hq, hc have are tensors of dim: # (batch_size, max_length q or c, 1024)

        Hq = self.lq(hq)
        Hq_b,Hq_e = Hq.split(128,dim=-1)

        Hc = self.lc(hc)
        Hc_b,Hc_e = Hc.split(128,dim=-1)
..

I get the following error:

Epoch:   0% 0/20 [00:00<?, ?it/s]
Iteration:   0% 0/6464 [00:00<?, ?it/s]Traceback (most recent call last):
  File "run_techqa.py", line 623, in <module>
    main()
  File "run_techqa.py", line 617, in main
    model = train(args, train_dataset, model, optimizer, tokenizer, model_evaluator)
  File "run_techqa.py", line 223, in train
    outputs = model(**inputs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/apex/amp/_initialize.py", line 197, in new_fwd
    **applier(kwargs, input_caster))
  File "/content/MyDrive/IBM/TechQA-Base/techqa-master/model_techqa.py", line 109, in forward
    Hq = self.lq(hq)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_mm

Did you try sending input to the GPU?

Hi @Kushagra_Bhatia,
The issue is with how you pass your inputs to the model during training. Can you post part of you code where you are training?

Hello, @harsha_g and @SANTOSH_S. I believe the dataset is on CUDA.
Training loop is shown below. Also, args.device=device(type=‘cuda’)

   for _ in train_iterator:
        # train_iterator is basically range(0, total_epochs). However in case we resume training then
        # train_iterator becomes range(completed_epochs, total_epochs)
        # In every epoch we compute the mini-batches again
        train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
         
        for step, batch in enumerate(epoch_iterator):
            # batch is an element in dataloader. i.e. effectively a mini-batch

            # Skip past any already trained steps if resuming training. i.e. number of mini-batches trained already in the epoch
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            model.train()
            batch = tuple(t.to(args.device) for t in batch)

            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "start_positions": batch[3],
                "end_positions": batch[4],
            }

            if args.model_type in ["roberta"]:
                del inputs["token_type_ids"] 
            # roberta doesn't require token_type_ids           

            outputs = model(**inputs)
            # model outputs are always tuple in transformers (see doc)

Then, I am assuming you also put your model on the GPU. Didn’t you? :wink:

Yes I did put it on GPU.

def load_model(args, model_class, config):

    model = BERTplusAoA(config, args)
    

    if args.local_rank == 0:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    model.to(args.device)
...

I am not entirely sure but could it be that you have another model BertModel inside your actual model, and does that need to be put on the device?

I imported it from transformers library.

from transformers import BertModel

I get that. I am just speculating if the following calls will help. I am not entirely sure. I’d just try those.

BertModel = BertModel.to(device)

or

self.bert = BertModel.to(device).from_pretrained( options.model_name_or_path, from_tf=bool(".ckpt" in options.model_name_or_path), config=config, cache_dir=options.cache_dir if options.cache_dir else None, )

No, it doesn’t resolve the error. Rather throws another one:

  BertModel.to(options.device)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 425, in to
    return self._apply(convert)
AttributeError: 'torch.device' object has no attribute '_apply'

I don’t think we are supposed to pass the BertModel to the device. For example, the following code snippet is from huggingface’s official implementation of BertForQuestionAnswering model.

class BertForQuestionAnswering(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()

Got it. Thanks for trying though. :+1:

@Kushagra_Bhatia Another keen look at the traceback and another thought. It’s evident the issue is at the :point_up: line. How about sending the device argument to the model and then putting hq and later hc on the device? I guess I was little sloppy earlier. :slight_smile:

1 Like

That actually solved my error. However, I am getting a new error now, which I am not able to resolve. It is as follows:

  File "run_techqa.py", line 623, in <module>
    main()
  File "run_techqa.py", line 617, in main
    model = train(args, train_dataset, model, optimizer, tokenizer, model_evaluator)
  File "run_techqa.py", line 223, in train
    outputs = model(**inputs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/apex/amp/_initialize.py", line 197, in new_fwd
    **applier(kwargs, input_caster))
  File "/content/MyDrive/IBM/TechQA-Base/techqa-master/model_techqa.py", line 113, in forward
    Hq = self.lq(hq)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: Expected object of scalar type Float but got scalar type Half for argument #2 'mat2' in call to _th_mm

I had used two functions in the forward part which I have not specified previously, these are:

   def splitting_(self,ids,type_ids):
      ans=[]
      for i,lis in enumerate(ids):
        typei=type_ids[i]
        ends=[]
        for j in range(len(typei)-1):
          if typei[j]!=typei[j+1]:
            ends.append(j+1)
        if len(ends)==1:
          qend=ends[0]
          dend=len(typei)
        else:
          qend,dend=ends[0],ends[1]
        qtns=lis[:qend]
        dtns=lis[qend:dend]
        temp=[qtns, dtns]
        ans.append(temp)
      q=[i[0] for i in ans]
      d=[i[1] for i in ans]
      return q,d

    # will take in list of n tensors and output a tensor of dimension n.
    # the list will contain tensors of sentence encoded using tokenizer.
    def _batchify(self,data, align_right=False, include_lengths=False):
      lengths = [x.size(0) for x in data]
      max_length = max(lengths)
      tens=torch.rand(1024,dtype=torch.float32)
      out=torch.stack([torch.stack([tens for _ in range(max_length)]) for _ in range(len(data))])
      for i in range(len(data)):
        data_length = data[i].size(0)
        offset = max_length - data_length if align_right else 0
        out[i].narrow(0, offset, data_length).copy_(data[i])
      if include_lengths:
          return out, lengths
      else:
          return out

Would really appreciate your help!

Note:
I have already tried doing hq=hq.float() and hq=torch.tensor(hq,dtype=torch.float32)

@Kushagra_Bhatia Glad, it worked. Now, onto the next one. Putting the two errors in context, it can be seen something’s off with the weights (self.wb and self.we) now and not hq. Try changing the types of the weights. I am sure that will take care of it. Let us know how it goes.

I don’t exactly understand, the traceback doesn’t explicitly mention self.wb and self.we. How do you infer about the two weights? They are defined as self.wb = torch.sigmoid(nn.Parameter(torch.zeros(1))). Actually I wanted weights to combine two vectors and I have used them like: ans_beg = torch.mul(self.wb,lb)+torch.mul(1-self.wb,s1). Basically it is just a weight between 0 and 1 that I wanted to define that can be learned by the model so that the two vectors could be effectively averaged. What do you think I need to change in the definition of wb and we?

Lecture time :slight_smile:

File "/content/MyDrive/IBM/TechQA-Base/techqa-master/model_techqa.py", line 113, in forward
   Hq = self.lq(hq)
 File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
   result = self.forward(*input, **kwargs)
 File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
   return F.linear(input, self.weight, self.bias)
 File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
   output = input.matmul(weight.t())
RuntimeError: Expected object of scalar type Float but got scalar type Half for argument #2 'mat2' in call to _th_mm

If you carefully see the traceback, you will realize that the error happened at line Hq = self.lq(hq). What’s happening there? You are making a call to the forward (the next line) of that model lq. What does it have? An F.Linear layer. And what happens in that layer? input.matmul(weight.t()). So, if you traceback, the input to the model lq is hq and the weight is? You guessed it right. torch.sigmoid(nn.Parameter(torch.zeros(1))).

Now, the first error you posted said RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_mm. This means there’s an mm (matrix multiplication), happening on the device, which takes two inputs, the first one being input and the second one being weight.t(). So, the earlier error was solved by sending input aka ‘hq’ to the device. Likewise, now the second error will be taken care of by setting the datatype of weight.t() aka self.wb. How? By specifying .float() or .double() when you call torch.zeros().

I know I wasn’t extremely clear but you get the point. :slight_smile:

1 Like

The F.linear comes from the nn.Linear from the torch.nn module. Inside the nn.Linear layer the code is as follows (official documentation from https://pytorch.org/docs/master/_modules/torch/nn/modules/linear.html#Linear):

class Linear(Module):

    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> Tensor:
        return F.linear(input, self.weight, self.bias)

As you can see the line

self.weight = Parameter(torch.Tensor(out_features, in_features)) defines the weight 

defines the weight that is passed to F.linear which performs the output = input.matmul(weight.t()) operation.

Please correct me if I am wrong.

1 Like

Right on the money.

So, did you manage to solve your latest error?

Ah, I see. I got that part wrong. I misread Sigmoid for Linear. :man_facepalming: But you got it right.

Did you try double or float64 instead?

if I do hq=hq.double() or hq=torch.tensor(hq,dtype=torch.float64) before moving hq to the device I get the error:

RuntimeError: Expected object of scalar type Float but got scalar type Half for argument #2 'mat2' in call to _th_mm

If I first move to device then do hq=torch.tensor(hq,dtype=torch.float64) I get:

Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_mm