unable to load NER pipeline with nlp.from_disk() - python

I am trying to load a pre-trained pipeline into my code like this:
nlp = de_core_news_sm.load()
nlp = nlp.from_disk('./TRAINED/Background/')
but I get a versos error saying:
ValueError Traceback (most recent call last)
<ipython-input-4-1f41fefa6daa> in <module>
1 nlp = de_core_news_sm.load()
----> 2 nlp = nlp.from_disk('./TRAINED/Background/')
3 print(nlp)
/opt/anaconda3/lib/python3.8/site-packages/spacy/language.py in from_disk(self, path, exclude, disable)
972 # Convert to list here in case exclude is (default) tuple
973 exclude = list(exclude) + ["vocab"]
--> 974 util.from_disk(path, deserializers, exclude)
975 self._path = path
976 return self
/opt/anaconda3/lib/python3.8/site-packages/spacy/util.py in from_disk(path, readers, exclude)
688 # Split to support file names like meta.json
689 if key.split(".")[0] not in exclude:
--> 690 reader(path / key)
691 return path
692
/opt/anaconda3/lib/python3.8/site-packages/spacy/language.py in deserialize_vocab(path)
948 def deserialize_vocab(path):
949 if path.exists():
--> 950 self.vocab.from_disk(path)
951 _fix_pretrained_vectors_name(self)
952
vocab.pyx in spacy.vocab.Vocab.from_disk()
strings.pyx in spacy.strings.StringStore.from_disk()
/opt/anaconda3/lib/python3.8/site-packages/srsly/_json_api.py in read_json(location)
48 data = sys.stdin.read()
49 return ujson.loads(data)
---> 50 file_path = force_path(location)
51 with file_path.open("r", encoding="utf8") as f:
52 return ujson.load(f)
/opt/anaconda3/lib/python3.8/site-packages/srsly/util.py in force_path(location, require_exists)
19 location = Path(location)
20 if require_exists and not location.exists():
---> 21 raise ValueError("Can't read file: {}".format(location))
22 return location
23
ValueError: Can't read file: TRAINED/Background/vocab/strings.json
If I open the Vocab folder on my macOS, there's no string.json file. Just a few exec files. What can I do to properly read the model?

You need to have a directory structure like below in order to load a spacy model. In your case, there is not strings.json file in the directory which is throwing the error.

Related

Joblib process not working when loading model

I have created an image classification model using pre-trained model inceptionV3. After I trained the model on my dataset I saved the model using joblib. When trying to load the model Im getting error "Unsuccessful TensorSliceReader constructor: Failed to find any matching files for ram://1ea4479d-6a25-4562-965a-428f7eb33342/variables/variables
You may be trying to load on a different device from the computational device. Consider setting the experimental_io_device option in tf.saved_model.LoadOptions to the io_device such as '/job:localhost'."
Any idea why is this message appearing or is it because you cant use joblib to save a model made from pre-trained model. Below is the code and the error
import joblib
joblib.dump(inceptionv3_model, 'inceptV3_model.pkl')
model_inceptionv3 = joblib.load('inceptV3_model.pkl')
FileNotFoundError Traceback (most recent call last)
<ipython-input-14-8ed26b03fd7d> in <module>
1 # loading the model
----> 2 model_inceptionv3 = joblib.load('C:/Users/Indranil/inceptV3_model.pkl')
~\anaconda3\lib\site-packages\joblib\numpy_pickle.py in load(filename, mmap_mode)
583 return load_compatibility(fobj)
584
--> 585 obj = _unpickle(fobj, filename, mmap_mode)
586 return obj
~\anaconda3\lib\site-packages\joblib\numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
502 obj = None
503 try:
--> 504 obj = unpickler.load()
505 if unpickler.compat_mode:
506 warnings.warn("The file '%s' has been generated with a "
~\anaconda3\lib\pickle.py in load(self)
1208 raise EOFError
1209 assert isinstance(key, bytes_types)
-> 1210 dispatch[key[0]](self)
1211 except _Stop as stopinst:
1212 return stopinst.value
~\anaconda3\lib\pickle.py in load_reduce(self)
1585 args = stack.pop()
1586 func = stack[-1]
-> 1587 stack[-1] = func(*args)
1588 dispatch[REDUCE[0]] = load_reduce
1589
~\anaconda3\lib\site-packages\keras\saving\pickle_utils.py in deserialize_model_from_bytecode(serialized_model)
46 with tf.io.gfile.GFile(dest_path, "wb") as f:
47 f.write(archive.extractfile(name).read())
---> 48 model = save_module.load_model(temp_dir)
49 tf.io.gfile.rmtree(temp_dir)
50 return model
~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\anaconda3\lib\site-packages\tensorflow\python\saved_model\load.py in load_internal(export_dir, tags, options, loader_cls, filters)
975 ckpt_options, options, filters)
976 except errors.NotFoundError as err:
--> 977 raise FileNotFoundError(
978 str(err) + "\n You may be trying to load on a different device "
979 "from the computational device. Consider setting the "
FileNotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for ram://1ea4479d-6a25-4562-965a-428f7eb33342/variables/variables
You may be trying to load on a different device from the computational device. Consider setting the `experimental_io_device` option in `tf.saved_model.LoadOptions` to the io_device such as '/job:localhost'.

NotFoundError while using pickle dump to save a model

I have created a model named 'model' but when I'm trying to save it using pickle it just gives an 'NotFoundError'.
import pickle
with open("test.pkl","wb") as file:
pickle.dump(model, file)
This is the error message I get upon running the code.
Error message
INFO:tensorflow:Assets written to: ram://471dfd58-f3fe-4d9f-9075-60a1568cc629/assets
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-47-0abd122520e5> in <module>
1 import pickle
2 with open("test.pkl","wb") as file:
----> 3 pickle.dump(model, file)
~\anaconda3\lib\site-packages\keras\engine\training.py in __reduce__(self)
313 if self.built:
314 return (pickle_utils.deserialize_model_from_bytecode,
--> 315 pickle_utils.serialize_model_as_bytecode(self))
316 else:
317 # SavedModel (and hence serialize_model_as_bytecode) only support
~\anaconda3\lib\site-packages\keras\saving\pickle_utils.py in serialize_model_as_bytecode(model)
75 with tf.io.gfile.GFile(dest_path, "rb") as f:
76 info = tarfile.TarInfo(name=os.path.relpath(dest_path, temp_dir))
---> 77 info.size = f.size()
78 archive.addfile(tarinfo=info, fileobj=f)
79 tf.io.gfile.rmtree(temp_dir)
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in size(self)
97 def size(self):
98 """Returns the size of the file."""
---> 99 return stat(self.__name).length
100
101 def write(self, file_content):
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in stat(filename)
908 errors.OpError: If the operation fails.
909 """
--> 910 return stat_v2(filename)
911
912
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in stat_v2(path)
924 errors.OpError: If the operation fails.
925 """
--> 926 return _pywrap_file_io.Stat(compat.path_to_str(path))
927
928
NotFoundError:
Any help would be massively appreciated
Not a solution but hopefully it still helps.
I had the same problem, and the same error. I ended up avoiding it by using Keras save and load methods instead of pickle. I don't know what your model is but you might want to try the same. It might be due to what is pickeleable. Maybe the answer given to this helps, they argue for saving tensorflow object separately.

Can't find model 'en'. It looks like you're trying to load a model from a shortcut, which is obsolete as of spaCy v3.0

I was trying to run
text_field = Field(tokenize='spacy', lower=True, include_lengths=True, batch_first=True)
However, the error shows :
OSError Traceback (most recent call last)
<ipython-input-72-1ac550316aec> in <module>
2
3 label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
----> 4 text_field = Field(tokenize='spacy', lower=True, include_lengths=True, batch_first=True)
5 fields = [('label', label_field), ('title', text_field), ('text', text_field), ('titletext', text_field)]
6
~/opt/anaconda3/lib/python3.8/site-packages/torchtext/data/field.py in __init__(self, sequential, use_vocab, init_token, eos_token, fix_length, dtype, preprocessing, postprocessing, lower, tokenize, tokenizer_language, include_lengths, batch_first, pad_token, unk_token, pad_first, truncate_first, stop_words, is_target)
161 # in case the tokenizer isn't picklable (e.g. spacy)
162 self.tokenizer_args = (tokenize, tokenizer_language)
--> 163 self.tokenize = get_tokenizer(tokenize, tokenizer_language)
164 self.include_lengths = include_lengths
165 self.batch_first = batch_first
~/opt/anaconda3/lib/python3.8/site-packages/torchtext/data/utils.py in get_tokenizer(tokenizer, language)
112 try:
113 import spacy
--> 114 spacy = spacy.load(language)
115 return partial(_spacy_tokenize, spacy=spacy)
116 except ImportError:
~/opt/anaconda3/lib/python3.8/site-packages/spacy/__init__.py in load(name, vocab, disable, exclude, config)
49 RETURNS (Language): The loaded nlp object.
50 """
---> 51 return util.load_model(
52 name, vocab=vocab, disable=disable, exclude=exclude, config=config
53 )
~/opt/anaconda3/lib/python3.8/site-packages/spacy/util.py in load_model(name, vocab, disable, exclude, config)
424 return load_model_from_path(name, **kwargs) # type: ignore[arg-type]
425 if name in OLD_MODEL_SHORTCUTS:
--> 426 raise IOError(Errors.E941.format(name=name, full=OLD_MODEL_SHORTCUTS[name])) # type: ignore[index]
427 raise IOError(Errors.E050.format(name=name))
428
OSError: [E941] Can't find model 'en'. It looks like you're trying to load a model from a shortcut, which is obsolete as of spaCy v3.0. To load the model, use its full name instead:
nlp = spacy.load("en_core_web_sm")
For more details on the available models, see the models directory: https://spacy.io/models. If you want to create a blank model, use spacy.blank: nlp = spacy.blank("en")
I even tried to do python -m spacy download en but still doesn't run on my jupyter notebook. Does anyone know how to fix this problem?

NotFoundError: encountered while using function tf.train.latest_checkpoint()

I built a CNN classification model and saved the checkpoints while training. After running this code.
checkpoint_dir = "/home/user/cnn-model/trained_model_1506946529/"
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints')
I get the error:
NotFoundError Traceback (most recent call last)
<ipython-input-60-8de4d687f60c> in <module>()
5 checkpoint_dir += '/'
6 print (checkpoint_dir + 'checkpoints')
----> 7 checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints')
8 print (checkpoint_file)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py in latest_checkpoint(checkpoint_dir, latest_filename)
1612 v1_path = _prefix_to_checkpoint_path(ckpt.model_checkpoint_path,
1613 saver_pb2.SaverDef.V1)
-> 1614 if file_io.get_matching_files(v2_path) or file_io.get_matching_files(
1615 v1_path):
1616 return ckpt.model_checkpoint_path
/usr/local/lib/python3.5/dist-packages/tensorflow/python/lib/io/file_io.py in get_matching_files(filename)
330 # Convert the filenames to string from bytes.
331 compat.as_str_any(matching_filename)
--> 332 for single_filename in filename
333 for matching_filename in pywrap_tensorflow.GetMatchingFiles(
334 compat.as_bytes(single_filename), status)
/usr/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback)
64 if type is None:
65 try:
---> 66 next(self.gen)
67 except StopIteration:
68 return
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
464 None, None,
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
468 pywrap_tensorflow.TF_DeleteStatus(status)
NotFoundError: /home/user/cnn-model/trained_model_1506946529/checkpoints
The file location exists and so does the checkpoints, what can i do to mitigate it?
tf.train.latest_checkpoint takes a folder name as arg. Just change it to:
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)

Error in NLTK file

I have installed Anaconda3-4.2.0 for Windows (64 bit) and nltk-3.2.1. While i am running the following code in Jupyter Notebook
`para = "Hello World. It's good to see you. Thanks for buying this book."
import nltk.data tokenizer = nltk.data.load('tokenizers/punkt/PY3/english.pickle') tokenizer.tokenize(para)'
I am getting the following error:
'OSError Traceback (most recent call last)
<ipython-input-1-a87e01558cc4> in <module>()
1 para = "Hello World. It's good to see you. Thanks for buying this book."
2 import nltk.data
----> 3 tokenizer = nltk.data.load('tokenizers/punkt/PY3/english.pickle')
4 tokenizer.tokenize(para)
C:\Anaconda3\lib\site-packages\nltk\data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
799
800 # Load the resource.
--> 801 opened_resource = _open(resource_url)
802
803 if format == 'raw':
C:\Anaconda3\lib\site-packages\nltk\data.py in _open(resource_url)
917
918 if protocol is None or protocol.lower() == 'nltk':
--> 919 return find(path_, path + ['']).open()
920 elif protocol.lower() == 'file':
921 # urllib might not use mode='rb', so handle this one ourselves:
C:\Anaconda3\lib\site-packages\nltk\data.py in find(resource_name, paths)
607 return GzipFileSystemPathPointer(p)
608 else:
--> 609 return FileSystemPathPointer(p)
610 else:
611 p = os.path.join(path_, url2pathname(zipfile))
C:\Anaconda3\lib\site-packages\nltk\compat.py in _decorator(*args, **kwargs)
559 def _decorator(*args, **kwargs):
560 args = (args[0], add_py3_data(args[1])) + args[2:]
--> 561 return init_func(*args, **kwargs)
562 return wraps(init_func)(_decorator)
563
C:\Anaconda3\lib\site-packages\nltk\data.py in __init__(self, _path)
298 _path = os.path.abspath(_path)
299 if not os.path.exists(_path):
--> 300 raise IOError('No such file or directory: %r' % _path)
301 self._path = _path
302
OSError: No such file or directory: 'C:\\nltk_data\\tokenizers\\punkt\\PY3\\PY3\\english.pickle'`
I have downloaded punktword tokenizer in nltk. Why I am seeing this error?Please give me an answer.
It seems tokenizers/punkt/PY3/english.pickle file not exists. You need check it.
NLTK can download pickle file use download function:
import nltk
nltk.download()

Categories

Resources