NotFoundError while using pickle dump to save a model

NotFoundError while using pickle dump to save a model - python

I have created a model named 'model' but when I'm trying to save it using pickle it just gives an 'NotFoundError'.
import pickle
with open("test.pkl","wb") as file:
pickle.dump(model, file)
This is the error message I get upon running the code.
Error message
INFO:tensorflow:Assets written to: ram://471dfd58-f3fe-4d9f-9075-60a1568cc629/assets
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-47-0abd122520e5> in <module>
1 import pickle
2 with open("test.pkl","wb") as file:
----> 3 pickle.dump(model, file)
~\anaconda3\lib\site-packages\keras\engine\training.py in __reduce__(self)
313 if self.built:
314 return (pickle_utils.deserialize_model_from_bytecode,
--> 315 pickle_utils.serialize_model_as_bytecode(self))
316 else:
317 # SavedModel (and hence serialize_model_as_bytecode) only support
~\anaconda3\lib\site-packages\keras\saving\pickle_utils.py in serialize_model_as_bytecode(model)
75 with tf.io.gfile.GFile(dest_path, "rb") as f:
76 info = tarfile.TarInfo(name=os.path.relpath(dest_path, temp_dir))
---> 77 info.size = f.size()
78 archive.addfile(tarinfo=info, fileobj=f)
79 tf.io.gfile.rmtree(temp_dir)
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in size(self)
97 def size(self):
98 """Returns the size of the file."""
---> 99 return stat(self.__name).length
100
101 def write(self, file_content):
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in stat(filename)
908 errors.OpError: If the operation fails.
909 """
--> 910 return stat_v2(filename)
911
912
~\anaconda3\lib\site-packages\tensorflow\python\lib\io\file_io.py in stat_v2(path)
924 errors.OpError: If the operation fails.
925 """
--> 926 return _pywrap_file_io.Stat(compat.path_to_str(path))
927
928
NotFoundError:
Any help would be massively appreciated

Not a solution but hopefully it still helps.
I had the same problem, and the same error. I ended up avoiding it by using Keras save and load methods instead of pickle. I don't know what your model is but you might want to try the same. It might be due to what is pickeleable. Maybe the answer given to this helps, they argue for saving tensorflow object separately.

Related

create_collection() got an unexpected keyword argument 'embedding_fn'

I was trying to use the langchain library to create a question answering system. But when I try to search in the document using the chromadb library it gives this error:
TypeError: create_collection() got an unexpected keyword argument 'embedding_fn'
Here's the code am working on
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Chroma
loader = TextLoader('./info.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings).
The last line generates the error.
This is the complete error message:
TypeError Traceback (most recent call last)
Input In [36], in <cell line: 1>()
----> 1 docsearch = Chroma.from_documents(texts, embeddings)
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:212, in Chroma.from_documents(cls, documents, embedding, ids, collection_name, persist_directory, **kwargs)
210 texts = [doc.page_content for doc in documents]
211 metadatas = [doc.metadata for doc in documents]
--> 212 return cls.from_texts(
213 texts=texts,
214 embedding=embedding,
215 metadatas=metadatas,
216 ids=ids,
217 collection_name=collection_name,
218 persist_directory=persist_directory,
219 )
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:178, in Chroma.from_texts(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, **kwargs)
151 #classmethod
152 def from_texts(
153 cls,
(...)
160 **kwargs: Any,
161 ) -> Chroma:
162 """Create a Chroma vectorstore from a raw documents.
163
164 If a persist_directory is specified, the collection will be persisted there.
(...)
176 Chroma: Chroma vectorstore.
177 """
--> 178 chroma_collection = cls(
179 collection_name=collection_name,
180 embedding_function=embedding,
181 persist_directory=persist_directory,
182 )
183 chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids)
184 return chroma_collection
File ~\anaconda3\lib\site-packages\langchain\vectorstores\chroma.py:65, in Chroma.__init__(self, collection_name, embedding_function, persist_directory)
60 logger.warning(
61 f"Collection {collection_name} already exists,"
62 " Do you have the right embedding function?"
63 )
64 else:
---> 65 self._collection = self._client.create_collection(
66 name=collection_name,
67 embedding_fn=self._embedding_function.embed_documents
68 if self._embedding_function is not None
69 else None,
70 )
TypeError: create_collection() got an unexpected keyword argument 'embedding_fn'

The create_collection method of chromadb.Client was changed 2 days ago and the embedding_fn parameter was renamed to embedding_function:
https://github.com/chroma-core/chroma/commit/6ce2388e219d47048e854be72be54617df647224
The source code for the langchain.vectorstores.chroma.Chroma class as of version 0.0.87 seems to have been updated already (3 hours before you asked the question) to match the chromadb library:
https://github.com/hwchase17/langchain/commit/34cba2da3264ccc9100f7efd16807c8d2a51734c
So you should be able to fix the problem by installing the newest version of LangChain.

models.vgg16(pretrained=True) due to TqdmKeyError: "Unknown argument(s): {'unit_divisor': 1024}"

im trying to run :
models.vgg16(pretrained=True)
and get pretained model for pytorch but i get as an error this, this error already was asked about before- but answer from there aren't helping me :/
I tried to upgrade tqdm, and twine but with no success.
thanks..
full log:
TqdmKeyError Traceback
(most recent call last)
<ipython-input-13-f93ae0d83650> in <module>
2 from torchvision import transforms, datasets, models
3
----> 4 model = models.vgg16(pretrained=True)
5
6 # n_inputs = model.classifier[6].in_features
c:\users\user\appdata\local\programs\python\python37\lib\site-packages\torchvision\models\vgg.py in vgg16(pretrained, progress, **kwargs)
148 progress (bool): If True, displays a progress bar of the download to stderr
149 """
--> 150 return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
151
152
c:\users\user\appdata\local\programs\python\python37\lib\site-packages\torchvision\models\vgg.py in _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs)
91 if pretrained:
92 state_dict = load_state_dict_from_url(model_urls[arch],
---> 93 progress=progress)
94 model.load_state_dict(state_dict)
95 return model
c:\users\user\appdata\local\programs\python\python37\lib\site-packages\torch\hub.py in load_state_dict_from_url(url, model_dir, map_location, progress, check_hash, file_name)
553 r = HASH_REGEX.search(filename) # r is Optional[Match[str]]
554 hash_prefix = r.group(1) if r else None
--> 555 download_url_to_file(url, cached_file, hash_prefix, progress=progress)
556
557 if _is_legacy_zip_format(cached_file):
c:\users\user\appdata\local\programs\python\python37\lib\site-packages\torch\hub.py in download_url_to_file(url, dst, hash_prefix, progress)
443 sha256 = hashlib.sha256()
444 with tqdm(total=file_size, disable=not progress,
--> 445 unit='B', unit_scale=True, unit_divisor=1024) as pbar:
446 while True:
447 buffer = u.read(8192)
c:\users\user\appdata\local\programs\python\python37\lib\site-packages\tqdm\_tqdm.py in __init__(self, iterable, desc, total, leave, file, ncols, mininterval, maxinterval, miniters, ascii, disable, unit, unit_scale, dynamic_ncols, smoothing, bar_format, initial, position, postfix, gui, **kwargs)
TqdmKeyError: "Unknown argument(s): {'unit_divisor': 1024}"

Reinstalling and upgrading tqdm worked for me!
pip uninstall tqdm
pip install tqdm
What version of tqdm do you have? I upgraded to 4.63.1 and it worked

unable to load NER pipeline with nlp.from_disk()

I am trying to load a pre-trained pipeline into my code like this:
nlp = de_core_news_sm.load()
nlp = nlp.from_disk('./TRAINED/Background/')
but I get a versos error saying:
ValueError Traceback (most recent call last)
<ipython-input-4-1f41fefa6daa> in <module>
1 nlp = de_core_news_sm.load()
----> 2 nlp = nlp.from_disk('./TRAINED/Background/')
3 print(nlp)
/opt/anaconda3/lib/python3.8/site-packages/spacy/language.py in from_disk(self, path, exclude, disable)
972 # Convert to list here in case exclude is (default) tuple
973 exclude = list(exclude) + ["vocab"]
--> 974 util.from_disk(path, deserializers, exclude)
975 self._path = path
976 return self
/opt/anaconda3/lib/python3.8/site-packages/spacy/util.py in from_disk(path, readers, exclude)
688 # Split to support file names like meta.json
689 if key.split(".")[0] not in exclude:
--> 690 reader(path / key)
691 return path
692
/opt/anaconda3/lib/python3.8/site-packages/spacy/language.py in deserialize_vocab(path)
948 def deserialize_vocab(path):
949 if path.exists():
--> 950 self.vocab.from_disk(path)
951 _fix_pretrained_vectors_name(self)
952
vocab.pyx in spacy.vocab.Vocab.from_disk()
strings.pyx in spacy.strings.StringStore.from_disk()
/opt/anaconda3/lib/python3.8/site-packages/srsly/_json_api.py in read_json(location)
48 data = sys.stdin.read()
49 return ujson.loads(data)
---> 50 file_path = force_path(location)
51 with file_path.open("r", encoding="utf8") as f:
52 return ujson.load(f)
/opt/anaconda3/lib/python3.8/site-packages/srsly/util.py in force_path(location, require_exists)
19 location = Path(location)
20 if require_exists and not location.exists():
---> 21 raise ValueError("Can't read file: {}".format(location))
22 return location
23
ValueError: Can't read file: TRAINED/Background/vocab/strings.json
If I open the Vocab folder on my macOS, there's no string.json file. Just a few exec files. What can I do to properly read the model?

You need to have a directory structure like below in order to load a spacy model. In your case, there is not strings.json file in the directory which is throwing the error.

Word2Vec error when loading in GoogleNews data

I am following a tutorial here: https://towardsdatascience.com/multi-class-text-classification-model-comparison-and-selection-5eb066197568
I am at the part "Word2vec and Logistic Regression". I have downloaded the "GoogleNews-vectors-negative300.bin.gz" file and I am tyring to apply it to my own text data. However when I get to the following code:
%%time
from gensim.models import Word2Vec
wv = gensim.models.KeyedVectors.load_word2vec_format("/data/users/USERS/File_path/classifier/GoogleNews_Embedding/GoogleNews-vectors-negative300.bin.gz", binary=True)
wv.init_sims(replace=True)
I run into the following error:
/data/users/msmith/env/lib64/python3.6/site-packages/smart_open/smart_open_lib.py:398: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function
'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
<timed exec> in <module>
~/env/lib64/python3.6/site-packages/gensim/models/keyedvectors.py in load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
1492 return _load_word2vec_format(
1493 cls, fname, fvocab=fvocab, binary=binary, encoding=encoding, unicode_errors=unicode_errors,
-> 1494 limit=limit, datatype=datatype)
1495
1496 def get_keras_embedding(self, train_embeddings=False):
~/env/lib64/python3.6/site-packages/gensim/models/utils_any2vec.py in _load_word2vec_format(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)
383 with utils.ignore_deprecation_warning():
384 # TODO use frombuffer or something similar
--> 385 weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype)
386 add_word(word, weights)
387 else:
/usr/lib64/python3.6/gzip.py in read(self, size)
274 import errno
275 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 276 return self._buffer.read(size)
277
278 def read1(self, size=-1):
/usr/lib64/python3.6/_compression.py in readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)
/usr/lib64/python3.6/gzip.py in read(self, size)
480 break
481 if buf == b"":
--> 482 raise EOFError("Compressed file ended before the "
483 "end-of-stream marker was reached")
484
EOFError: Compressed file ended before the end-of-stream marker was reached
Any idea whats gone wrong/ how to overcome this issue?
Thanks in advance!

Can't get librosa load a wav file

I got an audio dataset of many wav files and tired to use librosa to edit, but I have trouble reading some certain files by using librosa.load.Could someone help me figure it out?
here is my code:
import librosa
sound_clip = librosa.load('audio/fold1/180937-7-3-10.wav')
print(sound_clip)
here is the error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-5-93fe2f032e98> in <module>()
----> 1 sound_clip = librosa.load('audio/fold1/180937-7-3-10.wav')
2 print(sound_clip)
/home/uri7910/anaconda2/envs/tensorflow011/lib/python2.7/site-packages/librosa/core/audio.pyc in load(path, sr, mono, offset, duration, dtype)
107
108 y = []
--> 109 with audioread.audio_open(os.path.realpath(path)) as input_file:
110 sr_native = input_file.samplerate
111 n_channels = input_file.channels
/home/uri7910/anaconda2/envs/tensorflow011/lib/python2.7/site-packages/audioread/__init__.pyc in audio_open(path)
100 from . import maddec
101 try:
--> 102 return maddec.MadAudioFile(path)
103 except DecodeError:
104 pass
/home/uri7910/anaconda2/envs/tensorflow011/lib/python2.7/site-packages/audioread/maddec.pyc in __init__(self, filename)
24 def __init__(self, filename):
25 self.fp = open(filename, 'rb')
---> 26 self.mf = mad.MadFile(self.fp)
27 if not self.mf.total_time(): # Indicates a failed open.
28 raise UnsupportedError()
AttributeError: 'module' object has no attribute 'MadFile'

The failing line is:
self.mf = mad.MadFile(self.fp)
AttributeError: 'module' object has no attribute 'MadFile'
This looks to be a problem with the pyMad library. Would suggest looking into upgrading or reinstalling. that library. If that fails you might want to raise a bug.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

NotFoundError while using pickle dump to save a model - python

Related

create_collection() got an unexpected keyword argument 'embedding_fn'

models.vgg16(pretrained=True) due to TqdmKeyError: "Unknown argument(s): {'unit_divisor': 1024}"

unable to load NER pipeline with nlp.from_disk()

Word2Vec error when loading in GoogleNews data

Can't get librosa load a wav file

Categories

Resources