Can't get attribute 'MyScaler' on <module '__main__'> - python

I have a class in one of my notebooks as below :
class MyScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,with_mean=True,with_std=True,copy=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self,X,y=None):
self.scaler.fit(X[self.columns],y)
self.mean_ = np.array(np.mean(X[self.columns]))
self.var_ = np.array(np.var(X[self.columns]))
return self
def transform(self,X,y=None,copy=None):
initial_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled,X_scaled],axis=1)[initial_col_order]
I am pickling this class as :
with open('Custom_Scaler','wb') as file:
pickle.dump(MyScaler,file)
I have another module 'LogReg_Absent_Module' where i am trying to un-pickle this file. I have also defined this class in that module as follows:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin
#The custom scaler that only scales the non-dummy value columns.
class MyScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,with_mean=True,with_std=True,copy=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self,X,y=None):
self.scaler.fit(X[self.columns],y)
self.mean_ = np.array(np.mean(X[self.columns]))
self.var_ = np.array(np.var(X[self.columns]))
return self
def transform(self,X,y=None,copy=None):
initial_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled,X_scaled],axis=1)[initial_col_order]
#The class that we are going to use from here on to predict new data
class absenteeism_model():
def __init__(self,model_file,scaler_file):
with open('Absenteeism_Model','rb') as model_file,open('Custom_Scaler','rb') as scaler_file:
self.log_reg = pickle.load(model_file) #Load the previously saved model
self.scaler = pickle.load(scaler_file) #and scaler.
self.data = None
From a new notebook, when i try model = absenteeism_model('Absenteeism_Model','Custom_Scaler')
I get:
<ipython-input-66-8631c175353f> in <module>
----> 1 model = absenteeism_model('Absenteeism_Model','Custom_Scaler')
~\LogReg_Absent_Module.py in __init__(self, model_file, scaler_file)
37 with open('Absenteeism_Model','rb') as model_file,open('Custom_Scaler','rb') as scaler_file:
38 self.log_reg = pickle.load(model_file) #Load the previously saved model
---> 39 self.scaler = pickle.load(scaler_file) #and scaler.
40 self.data = None
41
AttributeError: Can't get attribute 'MyScaler' on <module '__main__'>```

I dont understand why you pickle a class and not an object of that class

Do you mind showing the code of part where you have implemented your MyScaler. What i think is the issue with the object of your MyScaler class while you are saving your scaler.
for example you have declared:
scaler = MyScaler(X)
then in this case you will pickle using following code:
with open ('Custom_Scaler' , 'wb') as file:
pickle.dump(scaler, file)
See if this solves your issue.

Related

error in loading a custom class in sklearn pipeline using joblib

I am trying to deploy an sklearn pipeline using FastApi
so first i saved my pipeline in a job lib file.
My pipeline looks like this:
pipe = Pipeline([('encoder', MultiColumnLabelEncoder(columns =['sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet','pe', 'ane'] )),
('scaler', StandardScaler()),
('model', lgb_model.best_estimator_)])
and my custom label encoder class looks like this:
class MultiColumnLabelEncoder:
def __init__(self,columns = None):
self.columns = columns # array of column names to encode
def fit(self,X,y=None):
return self # not relevant here
def transform(self,X):
'''
Transforms columns of X specified in self.columns using
LabelEncoder(). If no columns specified, transforms all
columns in X.
'''
output = X.copy()
if self.columns is not None:
for col in self.columns:
output[col] = LabelEncoder().fit_transform(output[col])
else:
for colname,col in output.iteritems():
output[colname] = LabelEncoder().fit_transform(col)
return output
def fit_transform(self,X,y=None):
return self.fit(X,y).transform(X)
The columns attribute in the instantiation is for specifying the categorical variables then i tried loading it in my web app using the code below.
import models.ml.classifier as clf
from fastapi import FastAPI
from joblib import load
from models.data import data_input
import pandas as pd
from utils import MultiColumnLabelEncoder
app = FastAPI(title="deployment",
description="API for machine learning project",
version="1.0")
#app.on_event('startup')
def load_model():
clf.model = load('models/ml/ckd_model.joblib')
I am getting the error below:
raise AttributeError("Can't get attribute {!r} on {!r}" attributeError: Can't get attribute 'MultiColumnLabelEncoder' on <module ' main ' from 'C:\Users\User\anaconda3\Scripts\uvicorn.exe\ main .ple>
[31mERROR.q0m: Application startup failed. Exiting.
Try importing the "main" module from anaconda home directory.

Constructor __init__ is written to take two position arguments, but when used, reports the error that only 1 is allowed

I have a class and constructor I'm working on, and I added an extra parameter to __init__, and now I get the error, TypeError: FeatureDataset() takes 1 positional argument but 2 were given.
I wonder why. It seems to me that it should accept two arguments. It's an incomplete function, but I'd like to get past this constructor argument number error. I have checked several answers and either they were about something specifically different, or indentation, and I have neither of those issues (4 indents per new indentation divide).
def FeatureDataset(Dataset):
def __init__(self, root_dir, file_name):
#load csv
self.file_out = pd.read_csv(file_name)
self.root_dir = root_dir
self.labels = self.file_out.iloc[1:160, 0].values
self.features = self.file_out.iloc[1:160, 1:].values
#Feature Scaling
sc = StandardScaler()
label_train = self.labels
feature_train = self.features #sc.fit_transform(features)
#Convert to torch tensors
self.feature_train = torch.tensor(label_train, dtype = torch.float32)
self.label_train = torch.tensor(label_train)
file_name = "data.csv"
root_dir = "archive"
feature_set = FeatureDataset(root_dir, file_name)
This defines a function, not a class:
def FeatureDataset(Dataset):
... try ...
class FeatureDataset(Dataset):

how to calling get_data function in this code

i am new to work with python and i want to run this code ,
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
class Preprocess():
def __init__(self, config, d_name):
self.root_dir = config.root_dir
self.batch_size = config.batch_size
self.cora = Planetoid(root='./data/cora', name='Cora')
#self.citeseer = Planetoid(root='./data/citeseer', name='CiteSeer')
#self.pubmed = Planetoid(root='./data/pubmed', name='PubMed')
self.num_classes, self.num_node_features, self.data = self.get_data(d_name)
def get_data(self, d_name):
'''
d_name = 'Cora', 'CiteSeer', 'PubMed'
'''
dataset = Planetoid(root=self.root_dir + d_name , name=d_name)
return dataset.num_classes, dataset.num_node_features, DataLoader(dataset, batch_size
= self.batch_size)
f=Preprocess(config, Cora)
f.get_data(Cora)
but get this error: name 'config' is not defined
So, It is not a bug.
Look your constructor:
def __init__(self, config, d_name):
self.root_dir = config.root_dir
self.batch_size = config.batch_size
etc...
And your instantiation:
f = Preprocess(config, Cora)
f.get_data(Cora)
Note that you are passing a variable "config" that has not been declared before. Also, by the arg "config" type in the constructor, yout need pass an object as a parameter to the instantiation "f = Preprocess(args)", because the arg "config" needs to have an "root_dir" atribute.
Please, check the Pytorch documentation for more examples of how to use this framework. Don't give up, you can do it.

Error while unpickle custom pipeline Ml model in python

I create a custom Pipeline in python. I have used the sklearn pipeline and it seems running successfully.
But When I save the model as a pickle file and want to load that saved pickle file in a different notebook it shows an error.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import ColumnTransformer
path = 'C:/Users/Desktop/'
df = pd.read_excel (path + "df.xlsx", sheet_name='df')
###################################################################################
# import the BaseEstimator
from sklearn.base import BaseEstimator
# define the class OutletTypeEncoder
# custom transformer must have methods fit and transform
class OutletTypeEncoder(BaseEstimator):
def __init__(self):
pass
#self.name = name
def fit(self, df, y=None):
return self
def transform(self, df):
# replace NaN
df[['pdf_tbl_pn_identifier', 'pdf_tbl_qty_identifier', 'pdf_header_present']] = df[['pdf_tbl_pn_identifier', 'pdf_tbl_qty_identifier', 'pdf_header_present']].fillna(value=-999)
df[['pdf_tbl_cnt']] = df[['pdf_tbl_cnt']].fillna(value=0)
# Replace gt 1 count as 0
df['pdf_tbl_cnt'] = np.where((df['pdf_tbl_cnt'] == '1'), 1, 0)
df['part_cnt'] = np.where((df['part_cnt'] == '1'), 1, 0)
# create numeric and categorica coulmns
obj_df= df[['pdf_tbl_pn_identifier','pdf_tbl_qty_identifier','pdf_header_present',
'pdf_body_pn_identifier','pdf_body_qty_identifier','pdf_model_rel_returned','pdf_model_ent_returned']]
num_df= df[['pdf_tbl_cnt', 'pdf_model_avg_relationship_score','pdf_model_avg_entity_score','part_cnt']]
# Labelencoding for categorica columns and then
obj_df=obj_df.apply(LabelEncoder().fit_transform)
df = pd.concat([obj_df, num_df], axis=1)
#df.reset_index(inplace=True, drop=True)
df.pdf_tbl_pn_identifier = df.pdf_tbl_pn_identifier.astype(str)
df.pdf_tbl_qty_identifier = df.pdf_tbl_qty_identifier.astype(str)
df.pdf_body_pn_identifier = df.pdf_body_pn_identifier.astype(str)
df.pdf_body_qty_identifier = df.pdf_body_qty_identifier.astype(str)
df.pdf_model_rel_returned = df.pdf_model_rel_returned.astype(str)
df.pdf_model_ent_returned = df.pdf_model_ent_returned.astype(str)
df.pdf_header_present = df.pdf_header_present.astype(str)
#df.matching = df.matching.astype(str)
#df['pdf_tbl_cnt'] = df['pdf_tbl_cnt'].apply(np.int64)
df.pdf_tbl_cnt = df.pdf_tbl_cnt.apply(np.int64)
return df
#################################################################################
feature_cols = df.drop(['matching'], axis=1)
X = feature_cols # Features
y = df.matching # Target variable
# split into train test sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
# Create Pipeline
logreg = LogisticRegression()
model_pipeline = Pipeline(steps=[('preprocess', OutletTypeEncoder()),
('logreg', LogisticRegression())
])
# fit the pipeline with the training data
model_pipeline.fit(X_train,y_train)
# Predict
y_pred=model_pipeline.predict(X_test)
Now I save the model as a pickle file and want to use that pickle file in another notebook.
But got an error that:
AttributeError: Can't get attribute 'OutletTypeEncoder' on <module 'main'>
# Save the Modle to file in the current working directory
Pkl_Filename = "C:\\Users\\SafayetKarim\\Desktop\\confidence_score\\results_updated\\pdf\\logisic_Model_pipeline.pkl"
with open(Pkl_Filename, 'wb') as file:
pickle.dump(model_pipeline, file)
# Load the Model back from file
with open('C:\\Users\\SafayetKarim\\Desktop\\confidence_score\\results_updated\\pdf\\logisic_Model_pipeline.pkl', 'rb') as file:
logisic_Model_pipeline = pickle.load(file)
logisic_Model_pipeline
Please help me out how to resolve the issue.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-4-28376e81d621> in <module>
1 # Load the Model back from file
2 with open('C:\\Users\\SafayetKarim\\Desktop\\confidence_score\\results_updated\\pdf\\OutletTypeEncoder.pkl', 'rb') as file:
----> 3 OutletTypeEncoder = pickle.load(file)
4
5 OutletTypeEncoder
AttributeError: Can't get attribute 'OutletTypeEncoder' on <module '__main__'>

Cannot get prediction from google AI platform with Pytorch

I've deployed a custom Pytorch model to the Google AI platform for prediction, but when I try to make a prediction request with image data using gcloud tools I get the following error in response:
{
"error": "Prediction failed: unknown error."
}
I've tried to encode my image data in b64 format or to place it into a multidimensional python array, by doing the following:
pil_im = Image.open('Pic512.png')
pil_im = pil_im.resize((224,224)).convert('RGB')
im_arr = np.asarray(pil_im)
py_arr = im_arr.tolist()
json_instance_1 = {'instances': py_arr}
with open('json_instance_1.json', 'w') as f:
json.dump(json_instance_1, f)
I converted it into b64 like so, after adjusting my Predictor code accordingly:
with open('Pic512.png', 'rb') as f:
byte_im = f.read()
json_instance = {'instances': {'b64': base64.b64encode(byte_im).decode()}}
with open('json_instance.json', 'w') as f:
json.dump(json_instance, f)
I've tried converting with different file formats and similar methods, but all of them give me the same error.
My predictor module:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
import torch
from torchvision import transforms
from torch.nn import functional as F
from PIL import Image
# from sklearn.externals import joblib
import numpy as np
import os
import io
import base64
class MyPredictor(object):
"""An example Predictor for an AI Platform custom prediction routine."""
def __init__(self, model, preprocessor, device):
"""Stores artifacts for prediction. Only initialized via `from_path`.
"""
self._resnet = model
self._mtcnn_mult = preprocessor
self._device = device
self.get_std_tensor = transforms.Compose([
np.float32,
np.uint8,
transforms.ToTensor(),
])
self.tensor2pil = transforms.ToPILImage(mode='RGB')
self.trans_resnet = transforms.Compose([
transforms.Resize((100, 100)),
np.float32,
transforms.ToTensor()
])
def predict(self, instances, **kwargs):
pil_transform = transforms.Resize((512, 512))
imarr = np.uint8(np.array(instances))
# img_bytes_string = io.BytesIO(base64.b64decode(instances))
pil_im = Image.fromarray(imarr)
# pil_im = Image.open(img_bytes_string)
image = pil_im.convert('RGB')
pil_im_512 = pil_transform(image)
boxes, _ = self._mtcnn_mult.detect(pil_im_512)
box = boxes[0]
face_tensor = extract_face(pil_im_512, box, margin=40)
std_tensor = self.get_std_tensor(face_tensor.permute(1, 2, 0))
cropped_pil_im = self.tensor2pil(std_tensor)
face_tensor = self.trans_resnet(cropped_pil_im)
face_tensor4d = face_tensor.unsqueeze(0)
face_tensor4d = face_tensor4d.to(self._device)
self._resnet.eval()
prediction = self._resnet(face_tensor4d)
preds = F.softmax(prediction, dim=1).detach().numpy().reshape(-1)
print('probability of (class1, class2) = ({:.4f}, {:.4f})'.format(preds[0], preds[1]))
return {'probs':preds.tolist()}
#classmethod
def from_path(cls, model_dir):
device_path = os.path.join(model_dir, 'device_cpu.pt')
device = torch.load(device_path)
model_path = os.path.join(model_dir, 'FullResNetRefinedExtra_no_norm_100x100_8634.pt')
classifier = torch.load(model_path, map_location=device)
mtcnn_path = os.path.join(model_dir, 'mtcnn_mult.pt')
mtcnn_mult = torch.load(mtcnn_path)
return cls(classifier, mtcnn_mult, device)
When I test the class locally everything works, so I assume it's a problem related the serialisation and deserialisation on the side of Google Platform. How can I resolve this issue?

Categories

Resources