Hello all,
I tried to load a leveldb dataset created by regular Caffe (datum) in Caffe2 using brew.db_input from the Python API, however I got the following error (Case #1) :
WARNING: Logging before InitGoogleLogging() is written to STDERR
E1124 13:53:47.919953 8191 prefetch_op.h:110] Prefetching error [enforce fail at tensor_protos_db_input.h:68] protos.protos_size() == OutputSize().
E1124 13:53:47.920176 8161 prefetch_op.h:83] Prefetching failed.
E1124 13:53:47.936815 8161 net_simple.cc:63] Operator failed: input: "dbreader_./data/dpnet_dpnet/TORCS_Training_1F" output: "data" output: "label" name: "" type: "TensorProtosDBInput" arg { name: "batch_size" i: 64 } device_option { device_type: 1 cuda_gpu_id: 0 }
WARNING:caffe2.python.workspace:Original python traceback for operator `-1107198840` in network `train_net` in exception above (most recent call last):
Traceback (most recent call last):
File "CNNTrainer_dpnet_dpnet.py", line 23, in <module>
stepsize=8000
File "/home/carlos/Documents/git/Caffe2_scripts/caffe2_torcs_predictor/CNNCreator_dpnet_dpnet.py", line 158, in train
workspace.RunNet(train_model.net)
File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 217, in RunNet
StringifyNetName(name), num_iter, allow_fail,
File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 178, in CallWithExceptionIntercept
return func(*args, **kwargs)
RuntimeError: [enforce fail at pybind_state.cc:1025] success. Error running net train_net
I also tried to load the dataset using brew.image_input but I got the following error (Case #2) :
WARNING:caffe2.python.workspace:Original python traceback for operator `1222868328` in network `train_net` in exception above (most recent call last):
Traceback (most recent call last):
File "CNNTrainer_dpnet_dpnet.py", line 24, in <module>
stepsize=8000
File "/home/carlos/Documents/git/Caffe2_scripts/caffe2_torcs_predictor/CNNCreator_dpnet_dpnet.py", line 195, in train
workspace.CreateNet(train_model.net, overwrite=True)
File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 152, in CreateNet
StringifyProto(net), overwrite,
File "/home/carlos/Documents/git/pytorch/build/caffe2/python/workspace.py", line 178, in CallWithExceptionIntercept
return func(*args, **kwargs)
RuntimeError: [enforce fail at cast.h:15] TensorProto_DataType_Parse(s, &to). Unknown 'to' argument: LEVELDB
Steps to reproduce the behavior:
(Case #1):
def add_input(self, model, batch_size, db, db_type, device_opts):
with core.DeviceScope(device_opts):
# load the data
data, label = brew.db_input(
model,
blobs_out=["data", "label"],
batch_size=batch_size,
db=db,
db_type=db_type,
)
# don't need the gradient for the backward pass
data = model.StopGradient(data, data)
return data, label
.
.
.
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'TORCS_Training_1F'), db_type='leveldb', device_opts=device_opts)
predictions = self.create_model(train_model, data, label, device_opts=device_opts)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay)
# Initialize and create the training network
workspace.RunNetOnce(train_model.param_init_net)
workspace.CreateNet(train_model.net, overwrite=True)
# Main Training Loop
print("== Starting Training for " + str(num_epoch) + " epochs ==")
for i in range(num_epoch):
workspace.RunNet(train_model.net)
if i % 50 == 0:
print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
print("Training done")
(Case #2):
def AddImageInput(self, model, reader, batch_size, db_type, is_test): #img_size
'''
The image input operator loads image and label data from the reader and
applies transformations to the images (random cropping, mirroring, ...).
'''
data, label = brew.image_input(
model,
reader, ["data", "label"],
batch_size=batch_size,
color=3,
output_type=db_type,
use_caffe_datum=False,
crop=0,
mirror=0,
is_test=is_test,
)
data = model.StopGradient(data, data)
return data, label
.
.
.
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
reader = train_model.CreateDB("reader", db=os.path.join(self._data_dir_, 'torcs-train-nchw-leveldb'), db_type='leveldb')
data, label = self.AddImageInput(train_model, reader=reader, batch_size=batch_size, db_type='leveldb', is_test=False)
predictions = self.create_model(train_model, data, label, device_opts=device_opts)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay)
# Initialize and create the training network
workspace.RunNetOnce(train_model.param_init_net)
workspace.CreateNet(train_model.net, overwrite=True)
# Main Training Loop
print("== Starting Training for " + str(num_epoch) + " epochs ==")
for i in range(num_epoch):
workspace.RunNet(train_model.net)
if i % 50 == 0:
print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
print("Training done")
The leveldb dataset employed is the one used by DeepDriving (http://deepdriving.cs.princeton.edu/) along with caffe (previous framework of Caffe2). Therefore, I know that the dataset is fine and could be loaded with caffe.