[Caffe2] How to load a dataset created by regular Caffe (datum) in Caffe2?

CarlosYeverino · November 28, 2018, 1:02pm

Hello all,

I tried to load a leveldb dataset created by regular Caffe (datum) in Caffe2 using brew.db_input from the Python API, however I got the following error (Case #1) :

WARNING: Logging before InitGoogleLogging() is written to STDERR
E1124 13:53:47.919953  8191 prefetch_op.h:110] Prefetching error [enforce fail at tensor_protos_db_input.h:68] protos.protos_size() == OutputSize().  
E1124 13:53:47.920176  8161 prefetch_op.h:83] Prefetching failed.
E1124 13:53:47.936815  8161 net_simple.cc:63] Operator failed: input: "dbreader_./data/dpnet_dpnet/TORCS_Training_1F" output: "data" output: "label" name: "" type: "TensorProtosDBInput" arg { name: "batch_size" i: 64 } device_option { device_type: 1 cuda_gpu_id: 0 }
WARNING:caffe2.python.workspace:Original python traceback for operator `-1107198840` in network `train_net` in exception above (most recent call last):
Traceback (most recent call last):
  File "CNNTrainer_dpnet_dpnet.py", line 23, in <module>
    stepsize=8000
  File "/home/carlos/Documents/git/Caffe2_scripts/caffe2_torcs_predictor/CNNCreator_dpnet_dpnet.py", line 158, in train
    workspace.RunNet(train_model.net)
  File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 217, in RunNet
    StringifyNetName(name), num_iter, allow_fail,
  File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 178, in CallWithExceptionIntercept
    return func(*args, **kwargs)
RuntimeError: [enforce fail at pybind_state.cc:1025] success. Error running net train_net

I also tried to load the dataset using brew.image_input but I got the following error (Case #2) :

WARNING:caffe2.python.workspace:Original python traceback for operator `1222868328` in network `train_net` in exception above (most recent call last):
Traceback (most recent call last):
  File "CNNTrainer_dpnet_dpnet.py", line 24, in <module>
    stepsize=8000
  File "/home/carlos/Documents/git/Caffe2_scripts/caffe2_torcs_predictor/CNNCreator_dpnet_dpnet.py", line 195, in train
    workspace.CreateNet(train_model.net, overwrite=True)
  File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 152, in CreateNet
    StringifyProto(net), overwrite,
  File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 178, in CallWithExceptionIntercept
    return func(*args, **kwargs)
RuntimeError: [enforce fail at cast.h:15] TensorProto_DataType_Parse(s, &to). Unknown 'to' argument: LEVELDB

Steps to reproduce the behavior:

(Case #1):

def add_input(self, model, batch_size, db, db_type, device_opts):
        with core.DeviceScope(device_opts):
            # load the data
            data, label = brew.db_input(
                model,
                blobs_out=["data", "label"],
                batch_size=batch_size,
                db=db,
                db_type=db_type,
            )

            # don't need the gradient for the backward pass
            data = model.StopGradient(data, data)

            return data, label
.
.
.

# == Training model ==
    	train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
    	data, label = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'TORCS_Training_1F'), db_type='leveldb', device_opts=device_opts)
    	predictions = self.create_model(train_model, data, label, device_opts=device_opts)
    	self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
    	self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
    	with core.DeviceScope(device_opts):
    		brew.add_weight_decay(train_model, weight_decay)

    	# Initialize and create the training network
    	workspace.RunNetOnce(train_model.param_init_net)
    	workspace.CreateNet(train_model.net, overwrite=True)

    	# Main Training Loop
    	print("== Starting Training for " + str(num_epoch) + " epochs ==")
    	for i in range(num_epoch):
    		workspace.RunNet(train_model.net)
    		if i % 50 == 0:
    			print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
    	print("Training done")

(Case #2):

def AddImageInput(self, model, reader, batch_size, db_type, is_test): #img_size
        '''
        The image input operator loads image and label data from the reader and
        applies transformations to the images (random cropping, mirroring, ...).
        '''
        data, label = brew.image_input(
            model,
            reader, ["data", "label"],
            batch_size=batch_size,
            color=3,
            output_type=db_type,
            use_caffe_datum=False,
            crop=0,
            mirror=0,
            is_test=is_test,
        )

        data = model.StopGradient(data, data)

        return data, label

.
.
.
# == Training model ==
    	train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
        reader = train_model.CreateDB("reader", db=os.path.join(self._data_dir_, 'torcs-train-nchw-leveldb'), db_type='leveldb')
        data, label = self.AddImageInput(train_model, reader=reader, batch_size=batch_size, db_type='leveldb', is_test=False)
    	predictions = self.create_model(train_model, data, label, device_opts=device_opts)
    	self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
    	self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
    	with core.DeviceScope(device_opts):
    		brew.add_weight_decay(train_model, weight_decay)

    	# Initialize and create the training network
    	workspace.RunNetOnce(train_model.param_init_net)
    	workspace.CreateNet(train_model.net, overwrite=True)

    	# Main Training Loop
    	print("== Starting Training for " + str(num_epoch) + " epochs ==")
    	for i in range(num_epoch):
    		workspace.RunNet(train_model.net)
    		if i % 50 == 0:
    			print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
    	print("Training done")

The leveldb dataset employed is the one used by DeepDriving (http://deepdriving.cs.princeton.edu/) along with caffe (previous framework of Caffe2). Therefore, I know that the dataset is fine and could be loaded with caffe.