Attribute lookup Field.<lambda> on torchtext.data.field failed


(Eduardo Freitas) #1

This is the error:

Traceback (most recent call last):
File “preprocess.py”, line 166, in
main()
File “preprocess.py”, line 162, in main
build_save_vocab(train_dataset_files, fields, opt)
File “preprocess.py”, line 107, in build_save_vocab
torch.save(fields, vocab_path)
File “/opt/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/serialization.py”, line 219, in save
return _with_file_like(f, “wb”, lambda f: _save(obj, f, pickle_module, pickle_protocol))
File “/opt/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/serialization.py”, line 144, in _with_file_like
return body(f)
File “/opt/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/serialization.py”, line 219, in
return _with_file_like(f, “wb”, lambda f: _save(obj, f, pickle_module, pickle_protocol))
File “/opt/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/serialization.py”, line 292, in _save
pickler.dump(obj)
_pickle.PicklingError: Can’t pickle <function Field. at 0x7f24904688c8>: attribute lookup Field. on torchtext.data.field failed

Conditions:

  • PyTorch 1.0.0; Python 3.6; Anaconda 3.
  • conda list return:
torch                      0.4.1                                pypi_0    pypi
torch-nightly              1.0.0.dev20190125                    pypi_0    pypi
torchtext                  0.4.0                                pypi_0    pypi
torchvision-cpu            0.2.1                                py_2    soumith

And this is the serialization.py:

def _save(obj, f, pickle_module, pickle_protocol):
    if sys.version_info[0] == 2:
        import StringIO
        if isinstance(f, StringIO.StringIO):
            msg = ('torch.save received unsupported StringIO.StringIO file object, whose '
                   'write method does not return the number of bytes written. '
                   'Please use something like io.BytesIO for torch.save instead.')
            raise RuntimeError(msg)

    import torch.nn as nn
    serialized_container_types = {}
    serialized_storages = {}

    def persistent_id(obj):
        # FIXME: the docs say that persistent_id should only return a string
        # but torch store returns tuples. This works only in the binary protocol
        # see
        # https://docs.python.org/2/library/pickle.html#pickling-and-unpickling-external-objects
        # https://github.com/python/cpython/blob/master/Lib/pickle.py#L527-L537
        if isinstance(obj, type) and issubclass(obj, nn.Module):
            if obj in serialized_container_types:
                return None
            serialized_container_types[obj] = True
            source_file = source = None
            try:
                source_file = inspect.getsourcefile(obj)
                source = inspect.getsource(obj)
            except Exception:  # saving the source is optional, so we can ignore any errors
                warnings.warn("Couldn't retrieve source code for container of "
                              "type " + obj.__name__ + ". It won't be checked "
                              "for correctness upon loading.")
            return ('module', obj, source_file, source)
        elif torch.is_storage(obj):
            storage_type = normalize_storage_type(type(obj))
            # Offset is always 0, but we keep it for backwards compatibility
            # with the old serialization format (which supported storage views)
            offset = 0
            obj_key = str(obj._cdata)
            location = location_tag(obj)
            serialized_storages[obj_key] = obj
            is_view = obj._cdata != obj._cdata
            if is_view:
                view_metadata = (str(obj._cdata), offset, obj.size())
            else:
                view_metadata = None

            return ('storage',
                    storage_type,
                    obj_key,
                    location,
                    obj.size(),
                    view_metadata)

        return None

    sys_info = dict(
        protocol_version=PROTOCOL_VERSION,
        little_endian=sys.byteorder == 'little',
        type_sizes=dict(
            short=SHORT_SIZE,
            int=INT_SIZE,
            long=LONG_SIZE,
        ),
    )

    pickle_module.dump(MAGIC_NUMBER, f, protocol=pickle_protocol)
    pickle_module.dump(PROTOCOL_VERSION, f, protocol=pickle_protocol)
    pickle_module.dump(sys_info, f, protocol=pickle_protocol)
    pickler = pickle_module.Pickler(f, protocol=pickle_protocol)
    pickler.persistent_id = persistent_id
    pickler.dump(obj)

    serialized_storage_keys = sorted(serialized_storages.keys())
    pickle_module.dump(serialized_storage_keys, f, protocol=pickle_protocol)
    f.flush()
    for key in serialized_storage_keys:
        serialized_storages[key]._write_file(f, _should_read_directly(f))