Loading a h5 file into pytorch - python

I just want to load a .h5 file exported from PyTorch, back into PyTorch.
Here's my code:
import torch
loaded_model = torch.load('/Users/me/tmp_model.h5')
Which spits out the following error:
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-6-bfb05a2f6d1e> in <module>
1 import torch
----> 2 loaded_model = torch.load('/Users/arielelkin/tmp_model.h5')
~/.pyenv/versions/3.8.6/lib/python3.8/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
605 opened_file.seek(orig_position)
606 return torch.jit.load(opened_file)
--> 607 return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
608 return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
609
~/.pyenv/versions/3.8.6/lib/python3.8/site-packages/torch/serialization.py in _load(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)
880 unpickler = UnpicklerWrapper(data_file, **pickle_load_args)
881 unpickler.persistent_load = persistent_load
--> 882 result = unpickler.load()
883
884 torch._utils._validate_loaded_sparse_tensors()
~/.pyenv/versions/3.8.6/lib/python3.8/site-packages/torch/serialization.py in find_class(self, mod_name, name)
873 def find_class(self, mod_name, name):
874 mod_name = load_module_mapping.get(mod_name, mod_name)
--> 875 return super().find_class(mod_name, name)
876
877 # Load the data (which may in turn use `persistent_load` to load tensors)
ModuleNotFoundError: No module named 'model'
I know that the model was exported thus:
ckpoint = 'version_131/epoch=171-step=1375.ckpt'
model = NSNetModel.load_from_checkpoint(Path('/Users/dev/Documents/models/'+ckpoint))
torch.save(model, 'tmp_model.h5')
What's the issue here? Am I missing an import?

Related

Getting error when trying to print class definition with inspect.getsource())

I am defining a class:
class MyFirstClass:
pass
After, I am trying to print the definition of MyFirstClass class:
import inspect
print(inspect.getsource(MyFirstClass))
But I am getting error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22132/2338486789.py in <module>
1 import inspect
----> 2 print(inspect.getsource(MyFirstClass))
C:\ProgramData\Anaconda3\lib\inspect.py in getsource(object)
971 or code object. The source code is returned as a single string. An
972 OSError is raised if the source code cannot be retrieved."""
--> 973 lines, lnum = getsourcelines(object)
974 return ''.join(lines)
975
C:\ProgramData\Anaconda3\lib\inspect.py in getsourcelines(object)
953 raised if the source code cannot be retrieved."""
954 object = unwrap(object)
--> 955 lines, lnum = findsource(object)
956
957 if istraceback(object):
C:\ProgramData\Anaconda3\lib\inspect.py in findsource(object)
766 is raised if the source code cannot be retrieved."""
767
--> 768 file = getsourcefile(object)
769 if file:
770 # Invalidate cache if needed.
C:\ProgramData\Anaconda3\lib\inspect.py in getsourcefile(object)
682 Return None if no way can be identified to get the source.
683 """
--> 684 filename = getfile(object)
685 all_bytecode_suffixes = importlib.machinery.DEBUG_BYTECODE_SUFFIXES[:]
686 all_bytecode_suffixes += importlib.machinery.OPTIMIZED_BYTECODE_SUFFIXES[:]
C:\ProgramData\Anaconda3\lib\inspect.py in getfile(object)
651 if getattr(module, '__file__', None):
652 return module.__file__
--> 653 raise TypeError('{!r} is a built-in class'.format(object))
654 if ismethod(object):
655 object = object.__func__
TypeError: <class '__main__.MyFirstClass'> is a built-in class
I expected oputput is:
class MyFirstClass:
pass
How to correctly use inspect.getsource()) to get my expected output (definition of MyFirstClass class)?

BertForTokenClassification Not loading

I tried to load a Bert model from local directory and it was showing an error
I am using cuda 10.0 version and pytorch 1.6.0
Code to load model:-
output_dir = './ner_model/'
model = BertForTokenClassification.from_pretrained(output_dir)
tokenizer = BertTokenizer.from_pretrained(output_dir)
model.to(device)
Any help would be appreicated
ReadError: invalid header
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
~\anaconda3\envs\env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
511 try:
--> 512 state_dict = torch.load(resolved_archive_file, map_location="cpu")
513 except Exception:
~\anaconda3\envs\env\lib\site-packages\torch\serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
385 try:
--> 386 return _load(f, map_location, pickle_module, **pickle_load_args)
387 finally:
~\anaconda3\envs\env\lib\site-packages\torch\serialization.py in _load(f, map_location, pickle_module, **pickle_load_args)
558 # .zip is used for torch.jit.save and will throw an un-pickling error here
--> 559 raise RuntimeError("{} is a zip archive (did you mean to use torch.jit.load()?)".format(f.name))
560 # if not a tarfile, reset file offset and proceed
RuntimeError: ./ner_model/pytorch_model.bin is a zip archive (did you mean to use torch.jit.load()?)
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-13-770da388c2c8> in <module>
23
24 output_dir = './ner_model/'
---> 25 model = BertForTokenClassification.from_pretrained(output_dir)
26 tokenizer = BertTokenizer.from_pretrained(output_dir)
27 model.to(device)
~\anaconda3\envs\env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
513 except Exception:
514 raise OSError(
--> 515 "Unable to load weights from pytorch checkpoint file. "
516 "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True. "
517 )
OSError: Unable to load weights from pytorch checkpoint file. If you tried to load a PyTorch model from a TF 2.0 checkpoint, pleas
e set from_tf=True.

I'm having a problem trying to load a Pytoch model: "Can't find Identity in module"

When trying to load a pytorch model it gives the following attribute error
model = torch.load('../input/melanoma-model/melanoma_model_0.pth')
model = model.to(device)
model.eval()
AttributeError Traceback (most recent call
last) in
1 arch = EfficientNet.from_pretrained('efficientnet-b2')
2 model = Net(arch=arch)
----> 3 torch.load('../input/melanoma-model/melanoma_model_0.pth')
4 model = model.to(device)
5 model.eval()
/opt/conda/lib/python3.7/site-packages/torch/serialization.py in
load(f, map_location, pickle_module, **pickle_load_args)
591 return torch.jit.load(f)
592 return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
--> 593 return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
594
595
/opt/conda/lib/python3.7/site-packages/torch/serialization.py in
_legacy_load(f, map_location, pickle_module, **pickle_load_args)
771 unpickler = pickle_module.Unpickler(f, **pickle_load_args)
772 unpickler.persistent_load = persistent_load
--> 773 result = unpickler.load()
774
775 deserialized_storage_keys = pickle_module.load(f, **pickle_load_args)
AttributeError: Can't get attribute 'Identity' on <module
'efficientnet_pytorch.utils' from
'/opt/conda/lib/python3.7/site-packages/efficientnet_pytorch/utils.py'>
First you need a model class to load the parameters from the .pth into. And you are missing one step:
model = Model() # the model class (yours has probably another name)
model.load_state_dict(torch.load('../input/melanoma-model/melanoma_model_0.pth'))
model = model.to(device)
model.eval()
There you go, I hope that solved your problem!

Cannot load a pickle file using joblib of sklearn

I trained a model in a cluster, downloaded it (pkl format) and tried to load locally. I know that sklearn's version of joblib was used to save a model mymodel.pkl (but I don't know which exactly version...).
from sklearn.externals import joblib
print(joblib.__version__)
model = joblib.load("mymodel.pkl")
I use the version 0.13.0 of sklearn's joblib locally.
This is the error that I got:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-100-d0a3c42e5c53> in <module>
3 print(joblib.__version__)
4
----> 5 model = joblib.load("mymodel.pkl")
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\numpy_pickle.py in load(filename, mmap_mode)
596 return load_compatibility(fobj)
597
--> 598 obj = _unpickle(fobj, filename, mmap_mode)
599
600 return obj
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
524 obj = None
525 try:
--> 526 obj = unpickler.load()
527 if unpickler.compat_mode:
528 warnings.warn("The file '%s' has been generated with a "
~\AppData\Local\Continuum\anaconda3\lib\pickle.py in load(self)
1083 raise EOFError
1084 assert isinstance(key, bytes_types)
-> 1085 dispatch[key[0]](self)
1086 except _Stop as stopinst:
1087 return stopinst.value
KeyError: 239
Update:
Also I tried, but got an error AttributeError: 'str' object has no attribute 'readable':
with io.BufferedReader("mymodel.pkl") as pickle_file:
model = pickle.load(pickle_file)
You tried to dump it with joblib.dump('pipeline','mymodel.pkl'). This only dumped the string 'pipeline'! Not your actual pipeline object.
Dump it correctly with:
joblib.dump(pipeline,'mymodel.pkl')
...then read back with:
model = joblib.load('mymodel.pkl')

Node2vec and networkx

I am attempting to run node2vec on a directed networkx network I have created. The network looks like this:
OutEdgeDataView([(7, 1, {'senderId': 7, 'weight': 273}), (7, 8, {'senderId': 7, 'weight': 319}), (7, 9, {'senderId': 7, 'weight': 137})....
With each node having an integer ID and a weight linking one node to another.
I am trying to use the node2vec module on this network as:
from node2vec import Node2Vec
node2vec = Node2Vec(mail_n_basic, dimensions=64, walk_length=30, num_walks=200, workers=4)
And am returned with this error, any help explaining the error would be much appreciated:
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\Andrew\Anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py", line 398, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File "C:\Users\Andrew\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 561, in __call__
return self.func(*args, **kwargs)
File "C:\Users\Andrew\Anaconda3\lib\site-packages\joblib\parallel.py", line 224, in __call__
for func, args, kwargs in self.items]
File "C:\Users\Andrew\Anaconda3\lib\site-packages\joblib\parallel.py", line 224, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\Andrew\Anaconda3\lib\site-packages\node2vec\node2vec.py", line 51, in parallel_generate_walks
walk_to = np.random.choice(walk_options, size=1)[0]
File "mtrand.pyx", line 1126, in mtrand.RandomState.choice
ValueError: a must be non-empty
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-58-3ac160061528> in <module>()
1
----> 2 node2vec = Node2Vec(mail_n_basic, dimensions=64, walk_length=30, num_walks=200, workers=4)
~\Anaconda3\lib\site-packages\node2vec\node2vec.py in __init__(self, graph, dimensions, walk_length, num_walks, p, q, weight_key, workers, sampling_strategy)
111
112 self.d_graph = self._precompute_probabilities()
--> 113 self.walks = self._generate_walks()
114
115 def _precompute_probabilities(self):
~\Anaconda3\lib\site-packages\node2vec\node2vec.py in _generate_walks(self)
178 self.NEIGHBORS_KEY,
179 self.PROBABILITIES_KEY) for idx, num_walks
--> 180 in enumerate(num_walks_lists, 1))
181
182 walks = flatten(walk_results)
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
960
961 with self._backend.retrieval_context():
--> 962 self.retrieve()
963 # Make sure that we get a last message telling us we are done
964 elapsed_time = time.time() - self._start_time
~\Anaconda3\lib\site-packages\joblib\parallel.py in retrieve(self)
863 try:
864 if getattr(self._backend, 'supports_timeout', False):
--> 865 self._output.extend(job.get(timeout=self.timeout))
866 else:
867 self._output.extend(job.get())
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
513 AsyncResults.get from multiprocessing."""
514 try:
--> 515 return future.result(timeout=timeout)
516 except LokyTimeoutError:
517 raise TimeoutError()
~\Anaconda3\lib\site-packages\joblib\externals\loky\_base.py in result(self, timeout)
429 raise CancelledError()
430 elif self._state == FINISHED:
--> 431 return self.__get_result()
432 else:
433 raise TimeoutError()
~\Anaconda3\lib\site-packages\joblib\externals\loky\_base.py in __get_result(self)
380 def __get_result(self):
381 if self._exception:
--> 382 raise self._exception
383 else:
384 return self._result
ValueError: a must be non-empty
I'm the author of this library.
If you are using Windows, parallel execution won't work because joblib and Windows issues.
Run the same code with the updated version pip install -U node2vec and when constructing the Node2Vec class, pass workers=1

Categories

Resources