how to calling get_data function in this code - python

i am new to work with python and i want to run this code ,
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
class Preprocess():
def __init__(self, config, d_name):
self.root_dir = config.root_dir
self.batch_size = config.batch_size
self.cora = Planetoid(root='./data/cora', name='Cora')
#self.citeseer = Planetoid(root='./data/citeseer', name='CiteSeer')
#self.pubmed = Planetoid(root='./data/pubmed', name='PubMed')
self.num_classes, self.num_node_features, self.data = self.get_data(d_name)
def get_data(self, d_name):
'''
d_name = 'Cora', 'CiteSeer', 'PubMed'
'''
dataset = Planetoid(root=self.root_dir + d_name , name=d_name)
return dataset.num_classes, dataset.num_node_features, DataLoader(dataset, batch_size
= self.batch_size)
f=Preprocess(config, Cora)
f.get_data(Cora)
but get this error: name 'config' is not defined

So, It is not a bug.
Look your constructor:
def __init__(self, config, d_name):
self.root_dir = config.root_dir
self.batch_size = config.batch_size
etc...
And your instantiation:
f = Preprocess(config, Cora)
f.get_data(Cora)
Note that you are passing a variable "config" that has not been declared before. Also, by the arg "config" type in the constructor, yout need pass an object as a parameter to the instantiation "f = Preprocess(args)", because the arg "config" needs to have an "root_dir" atribute.
Please, check the Pytorch documentation for more examples of how to use this framework. Don't give up, you can do it.

Related

Change the imported libraries of a saved pickle

Some time ago I saved a class function into a pickle file.
<library.module.Class at 0x1c926b2e520>
That class imports two libraries that changed their name meanwhile.
Is it possible to edit the pickle file so I can update those 2 new imports without regenerating the pickle all over again?
Thank you!
Regards
EDIT:
This is how I am loading the pickle:
import pickle
model_path = os.getenv('MODELS_FOLDER') + 'model_20210130.pkl'
model = pickle.load(open(model, 'rb'))
This of the pickle class content. The two libraries I want to update are pinpointed.
**import socceraction.spadl.config** as spadlconfig
**from socceraction.spadl.base import** SPADLSchema
class ExpectedThreat:
"""An implementation of the model.
"""
def __init__(self):
...
def __solve(
self) -> None:
def fit(self, actions: DataFrame[SPADLSchema]) -> 'ExpectedThreat':
"""Fits the xT model with the given actions."""
def predict(
self, actions: DataFrame[SPADLSchema], use_interpolation: bool = False
) -> np.ndarray:
"""Predicts the model values for the given actions.
I don't think you can do that.
Pickled objects are serialized content, and in order to be modified, it must be de-serialized properly.
Why can't you just load it, change it, and overwrite it?
You can still overwrite functions like properties with new ones:
# import new libraries
import new_socceraction.spadl.config as new_spadlconfig
from new_socceraction.spadl.base import new_SPADLSchema
import pickle
model_path = os.getenv('MODELS_FOLDER') + 'model_20210130.pkl'
with open(model_path, 'rb') as file:
model = pickle.load(file)
# define new methods with different libraries
def fit(self, actions: DataFrame[new_SPADLSchema]) -> 'ExpectedThreat':
"""Fits the xT model with the given actions."""
pass # your new implementation
def predict(
self, actions: DataFrame[new_SPADLSchema], use_interpolation: bool = False
) -> np.ndarray:
"""Predicts the model values for the given actions."""
pass # your new implementation
# overwrite new methods
model.fit = fit
model.predict = predict
# save it again
with open(model_path, 'wb') as file:
pickle.dump(file)
...

Is there a way to pickle a custom tensorflow.keras metric?

I defined the following custom metric to train my model in tensorflow:
import tensorflow as tf
from tensorflow import keras as ks
N_CLASSES = 15
class MulticlassMeanIoU(tf.keras.metrics.MeanIoU):
def __init__(self,
y_true = None,
y_pred = None,
num_classes = None,
name = "Multi_MeanIoU",
dtype = None):
super(MulticlassMeanIoU, self).__init__(num_classes = num_classes,
name = name, dtype = dtype)
self.__name__ = name
def get_config(self):
base_config = super().get_config()
return {**base_config, "num_classes": self.num_classes}
def update_state(self, y_true, y_pred, sample_weight = None):
y_pred = tf.math.argmax(y_pred, axis = -1)
return super().update_state(y_true, y_pred, sample_weight)
met = MulticlassMeanIoU(num_classes = N_CLASSES)
After training the model, I save the model and I also tried to save the custom object as follows:
with open("/some/path/custom_metrics.pkl", "wb") as f:
pickle.dump(met, f)
However, when I try to load the metric like this:
with open(path_custom_metrics, "rb") as f:
met = pickle.load(f)
I always get some errors, e.g. AttributeError: 'MulticlassMeanIoU' object has no attribute 'update_state_fn'.
Now I wonder whether it is possible to pickle a custom metric at all and if so, how? It would come in handy if I could save custom metrics with the model, so when I load the model in another Python session, I always have the metric which is required to load the model in the first place. It would be possible to define the metric anew through inserting the full code to the other script before loading the model, however, I think this would be bad style and could cause problems in case I would change something about the metric in the training script and forget to copy the code to the other script.
If you need to pickle a metric, one possible solution is to use __getstate__() and __setstate__() methods. During the (de)serialization process, these two methods are called, if they are available. Add these methods to your code and you will have what you need. I tried to make it as general as possible, so that it works for any Metric:
def __getstate__(self):
variables = {v.name: v.numpy() for v in self.variables}
state = {
name: variables[var.name]
for name, var in self._unconditional_dependency_names.items()
if isinstance(var, tf.Variable)}
state['name'] = self.name
state['num_classes'] = self.num_classes
return state
def __setstate__(self, state: Dict[str, Any]):
self.__init__(name=state.pop('name'), num_classes=state.pop('num_classes'))
for name, value in state.items():
self._unconditional_dependency_names[name].assign(value)

Constructor __init__ is written to take two position arguments, but when used, reports the error that only 1 is allowed

I have a class and constructor I'm working on, and I added an extra parameter to __init__, and now I get the error, TypeError: FeatureDataset() takes 1 positional argument but 2 were given.
I wonder why. It seems to me that it should accept two arguments. It's an incomplete function, but I'd like to get past this constructor argument number error. I have checked several answers and either they were about something specifically different, or indentation, and I have neither of those issues (4 indents per new indentation divide).
def FeatureDataset(Dataset):
def __init__(self, root_dir, file_name):
#load csv
self.file_out = pd.read_csv(file_name)
self.root_dir = root_dir
self.labels = self.file_out.iloc[1:160, 0].values
self.features = self.file_out.iloc[1:160, 1:].values
#Feature Scaling
sc = StandardScaler()
label_train = self.labels
feature_train = self.features #sc.fit_transform(features)
#Convert to torch tensors
self.feature_train = torch.tensor(label_train, dtype = torch.float32)
self.label_train = torch.tensor(label_train)
file_name = "data.csv"
root_dir = "archive"
feature_set = FeatureDataset(root_dir, file_name)
This defines a function, not a class:
def FeatureDataset(Dataset):
... try ...
class FeatureDataset(Dataset):

predict() got an unexpected keyword argument 'stats'

I am trying to get predictions from a Tensor Flow custom routine served on AI platform.
I have managed to serve it with the following settings: --runtime-version 2.3 --python-version 3.7 --machine-type mls1-c4-m2
But I keep getting this error when i try to make any predictions.
ERROR:root:Prediction failed: predict() got an unexpected keyword argument 'stats'
ERROR:root:Prediction failed: unknown error.
The routine has two steps:
Takes the input (a string) and transforms it into a embedding using a bow model in .pkl format
Uses the embeddings for getting the predictions using a keras model saved as an .h5 file
this is my setup.py
from setuptools import setup
REQUIRED_PACKAGES = ['Keras==2.3.1', 'sklearn==0.0', 'h5py<3.0.0', 'numpy==1.16.0', 'scipy==1.4.1', 'pyyaml==5.2']
setup(
name='my_custom_code',
version='0.1',
scripts=['predictor.py'],
install_requires=REQUIRED_PACKAGES,
packages=find_packages(),
include_package_data=False,
description=''
)
And this is my predictor.py
import os
import pickle
import tensorflow as tf
import numpy as np
class MyPredictor(object):
def __init__(self, model, bow_model):
self._model = model
self._bow_model = bow_model
def predict(self, instances):
outputs = []
for x in instances:
vector = self.embedding(x)
output = self._model.predict(vector)
outputs.append(output)
return outputs
def embedding(self, statement):
vector = self._bow_model.transform(statement).toarray()
vector = vector.to_list()
return vector
#classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.h5')
model = tf.keras.models.load_model(model_path, compile = False)
preprocessor_path = os.path.join(model_dir, 'bow.pkl')
with open(preprocessor_path, 'rb') as f:
bow_model = pickle.load(f)
return cls(model, bow_model)
The script im using for testing is:
import googleapiclient.discovery
instances = ['test','test']]
service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, VERSION_NAME)
response = service.projects().predict(
name=name,
body={'instances': instances}
).execute()
if 'error' in response:
raise RuntimeError(response['error'])
else:
print(response['predictions'])
According to the Custom prediction routine documentation, once creating the predictor class, predict() method should be supplied with self, instances, **kwargs arguments to properly handle the prediction request.
instances: A list of prediction input instances.
**kwargs: A dictionary of keyword args provided as additional fields on the predict request body.

Can't get attribute 'MyScaler' on <module '__main__'>

I have a class in one of my notebooks as below :
class MyScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,with_mean=True,with_std=True,copy=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self,X,y=None):
self.scaler.fit(X[self.columns],y)
self.mean_ = np.array(np.mean(X[self.columns]))
self.var_ = np.array(np.var(X[self.columns]))
return self
def transform(self,X,y=None,copy=None):
initial_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled,X_scaled],axis=1)[initial_col_order]
I am pickling this class as :
with open('Custom_Scaler','wb') as file:
pickle.dump(MyScaler,file)
I have another module 'LogReg_Absent_Module' where i am trying to un-pickle this file. I have also defined this class in that module as follows:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
#The custom scaler that only scales the non-dummy value columns.
class MyScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,with_mean=True,with_std=True,copy=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self,X,y=None):
self.scaler.fit(X[self.columns],y)
self.mean_ = np.array(np.mean(X[self.columns]))
self.var_ = np.array(np.var(X[self.columns]))
return self
def transform(self,X,y=None,copy=None):
initial_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled,X_scaled],axis=1)[initial_col_order]
#The class that we are going to use from here on to predict new data
class absenteeism_model():
def __init__(self,model_file,scaler_file):
with open('Absenteeism_Model','rb') as model_file,open('Custom_Scaler','rb') as scaler_file:
self.log_reg = pickle.load(model_file) #Load the previously saved model
self.scaler = pickle.load(scaler_file) #and scaler.
self.data = None
From a new notebook, when i try model = absenteeism_model('Absenteeism_Model','Custom_Scaler')
I get:
<ipython-input-66-8631c175353f> in <module>
----> 1 model = absenteeism_model('Absenteeism_Model','Custom_Scaler')
~\LogReg_Absent_Module.py in __init__(self, model_file, scaler_file)
37 with open('Absenteeism_Model','rb') as model_file,open('Custom_Scaler','rb') as scaler_file:
38 self.log_reg = pickle.load(model_file) #Load the previously saved model
---> 39 self.scaler = pickle.load(scaler_file) #and scaler.
40 self.data = None
41
AttributeError: Can't get attribute 'MyScaler' on <module '__main__'>```
I dont understand why you pickle a class and not an object of that class
Do you mind showing the code of part where you have implemented your MyScaler. What i think is the issue with the object of your MyScaler class while you are saving your scaler.
for example you have declared:
scaler = MyScaler(X)
then in this case you will pickle using following code:
with open ('Custom_Scaler' , 'wb') as file:
pickle.dump(scaler, file)
See if this solves your issue.

Categories

Resources