I am receving this error while I am try to run the code (from CMD):
ModuleNotFoundError: No module named 'numbers.hog'; numbers is not a package
Here is the hog.py file code...
from skimage import feature
class HOG:
def __init__(self, orientations = 9, pixelsPerCell = (8, 8),
cellsPerBlock = (3, 3), normalize = False):
self.orienations = orientations
self.pixelsPerCell = pixelsPerCell
self.cellsPerBlock = cellsPerBlock
self.normalize = normalize
def describe(self, image):
hist = feature.hog(image,
orientations = self.orienations,
pixels_per_cell = self.pixelsPerCell,
cells_per_block = self.cellsPerBlock,
normalize = self.normalize)
return hist
...and the main (train.py) which return the error.
from sklearn.svm import LinearSVC
from numbers.hog import HOG
from numbers import dataset
import argparse
import pickle as cPickle
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required = True,
help = "path to the dataset file")
ap.add_argument("-m", "--model", required = True,
help = "path to where the model will be stored")
args = vars(ap.parse_args())
(digits, target) = dataset.load_digits(args["dataset"])
data = []
hog = HOG(orientations = 18, pixelsPerCell = (10, 10),
cellsPerBlock = (1, 1), normalize = True)
for image in digits:
image = dataset.deskew(image, 20)
image = dataset.center_extent(image, (20, 20))
hist = hog.describe(image)
data.append(hist)
model = LinearSVC(random_state = 42)
model.fit(data, target)
f = open(args["model"], "w")
f.write(cPickle.dumps(model))
f.close()
I don't uderstand why it gives me error on module package. numbers is a package, why it don't import it as well (as it seems) ?
UPDATE: tried to put from .hog import HOG and then execute from CMD..It prints:
No module named '__main__.hog'; '__main__' is not a package
Is it crazy ? hog.py is in the main package together with the other files. As you can see, it also contains HOG class.... Can't understand.. Some one can reproduce the error ?
In the IDE console it prints:
usage: train.py [-h] -d DATASET -m MODEL
train.py: error: the following arguments are required: -d/--dataset, -m/--model
This should be correct as soon as it is executed in IDE because the program MUST run in CMD.
UPDATE 2: for who is interested, this is the project https://github.com/VAUTPL/Number_Detection
Change from numbers.hog import HOG to from hog import HOG and change from numbers import dataset to import dataset.
You are already in the "numbers" package so you don't have to precise it again when you import it.
When you type from numbers import dataset, Python will look for a package numbers (inside the actual package) that contains a dataset.py file.
If your train.py was outside the numbers package then you have to put the package name (numbers) before.
Important
numbers is a python standard package
https://docs.python.org/2/library/numbers.html
Check if you are not really importing that package or rename your package to a more specific name.
Also:
It might looks like python doesnt recognize your package.
Open a python shell and write:
import sys
print sys.path
Check if your number path is there.
If it's not there you have to add it.
sys.path.insert(0, "/path/to/your/package_or_module")
Your train.py file is already in the package "numbers", so you don't have to import numbers.
Try this instead:
from hog import HOG
I saw in comment that it gives you "error (red line)".
Can you be more precise, because I don't see errors there.
Related
I wrote a module for my dataset classes, located in the same directory as my main file.
When i import it and try to instantiate a class i get a "np is not defined" error even though numpy is imported correctly in both the main file and the module.
I'm saying correctly because if i try both to call another numpy function from the main or to execute the module on alone no error rises.
This is the code in the main file:
import torch
import numpy as np
from myDatasets import SFCDataset
trainDs = SFCDataset(paths["train"])
and this is the module:
import torch
import numpy as np
from torch.utils.data import Dataset
#Single Fragment Classification Dataset
class SFCDataset(Dataset):
def __init__(self, path, transform=None, norms=False, areas=False, **kwargs, ):
super(SFCDataset, self).__init__()
self.data = torch.load(path)
self.fragments = []
self.labels = []
for item in self.data:
self.fragments.append(item[0])
self.labels.append(item[1])
self.fragments=np.array(self.fragments)
self.labels=np.array(self.labels)
if norms:
if areas:
self.fragments = np.transpose(self.fragments[:], (0,2,1,3)).reshape(-1,1024,9)[:,:,:7]
else:
self.fragments = np.transpose(self.fragments[:], (0,2,1,3)).reshape(-1,1024,9)[:,:,:6]
else:
if areas:
self.fragments = np.transpose(self.fragments[:], (0,2,1,3)).reshape(-1,1024,9)[:,:,[0,1,2,6]]
else:
self.fragments = np.transpose(self.fragments[:], (0,2,1,3)).reshape(-1,1024,9)[:,:,:3]
self.transform = transform
def __len__(self) -> int:
return len(self.data)
def __getitem__(self, index):
label = self.labels[index]
pointdata = self.fragments[index]
if self.transform:
pointdata = self.transform(pointdata)
return pointdata, label
if __name__ == "__main__":
print("huh")
path = "C:\\Users\\ale23\\OneDrive\\Desktop\\Università\\Tesi\\data\\dataset_1024_AB\\train_dataset_AED_norm_area.pt"
SFCDataset(path)
I don't know what else to try.
I'm on VSCode, using a 3.9.13 virtual enviroment.
edit:
this is the error i'm getting:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[75], line 5
2 import numpy as np
3 from myDatasets import SFCDataset
----> 5 trainDs = SFCDataset(paths["train"])
File c:\Users\ale23\OneDrive\Desktop\Università\Dottorato\EquivariantNN\myDatasets.py:17, in SFCDataset.__init__(self, path, transform, norms, areas, **kwargs)
15 self.fragments.append(item[0])
16 self.labels.append(item[1])
---> 17 self.fragments=np.array(self.fragments)
18 self.labels=np.array(self.labels)
20 if norms:
NameError: name 'np' is not defined
edit2: i edited some stuff like path names and parts occuring after the error to try to lighten up the code, sorry, i should have uploaded all the code as it is run now.
Edit3: I was trying to reproduce the error in some other way and was building a dummy module. I tried to import the class in that other module and running the dummy.py and it runs. That appears to be a problem with the fact i'm working on a notebook, is that possible?
the dummy module:
import numpy as np
def test():
print(np.array(1))
print(np.array(2))
print(np.sum(np.random.rand(2)))
from myDatasets import SFCDataset
trainDs=SFCDataset("C:\\Users\\ale23\\OneDrive\\Desktop\\Università\\Tesi\\data\\dataset_1024_AB\\train_dataset_AED_norm_area.pt")
this runs by calling "python testmodule.py" in the console
Edit 4:
Today i restarted my pc and run the same code as yesterday and the notebook works. Yesterday i tried to close vscode and restart it, but it did not help.
Maybe something is wrong with the virtual environment? I don't know where to look at honestly.
Anyways the program now runs with no errors, should i close this?
Thank you all for your time and help
While using the rpy2 library of Python to work with R. I get the following error message while trying to import a function of the bnlearn package:
# Using R inside python
import rpy2
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
from rpy2.robjects.packages import importr
utils = rpackages.importr('utils')
utils.chooseCRANmirror(ind=1)
# Install packages
packnames = ('visNetwork', 'bnlearn')
utils.install_packages(StrVector(packnames))
# Load packages
visNetwork = importr('visNetwork')
bnlearn = importr('bnlearn')
tabu = bnlearn.tabu
fit = bn.learn.bn.fit
With the error:
AttributeError: module 'bnlearn' has no attribute 'bn'
While checking the bnlearn documentation one finds out that bn is a class structure. So one should check out all the attributes of the object in question, that is, running:
bnlearn.__dict__['_rpy2r']
After that you should get a similar output like the next one, where you find how you would import each attribute of bnlearn:
...
...
'bn_boot': 'bn.boot',
'bn_cv': 'bn.cv',
'bn_cv_algorithm': 'bn.cv.algorithm',
'bn_cv_structure': 'bn.cv.structure',
'bn_fit': 'bn.fit',
'bn_fit_backend': 'bn.fit.backend',
'bn_fit_backend_continuous': 'bn.fit.backend.continuous',
'bn_fit_backend_discrete': 'bn.fit.backend.discrete',
'bn_fit_backend_mixedcg': 'bn.fit.backend.mixedcg',
'bn_fit_barchart': 'bn.fit.barchart',
'bn_fit_dotplot': 'bn.fit.dotplot',
...
...
Then, running the following will solve the issue:
bn_fit = bnlearn.bn_fit
Now, you could, for example, run a bayesian Network:
structure = tabu(datos, score = "loglik-g")
bn_mod = bn_fit(structure, data = datos, method = "mle")
In general, this approach solves the issue of importing any function from an R package into Python through the rpy2 package.
I want to create an endpoint for scikit logistic regression in AWS Sagemaker. I have a train.py file which contains training code for scikit sagemaker.
import subprocess as sb
import pandas as pd
import numpy as np
import pickle,json
import sys
def install(package):
sb.call([sys.executable, "-m", "pip", "install", package])
install('s3fs')
import argparse
import os
if __name__ =='__main__':
parser = argparse.ArgumentParser()
# hyperparameters sent by the client are passed as command-line arguments to the script.
parser.add_argument('--solver', type=str, default='liblinear')
# Data, model, and output directories
parser.add_argument('--output_data_dir', type=str, default=os.environ.get('SM_OUTPUT_DIR'))
parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
args, _ = parser.parse_known_args()
# ... load from args.train and args.test, train a model, write model to args.model_dir.
input_files = [ os.path.join(args.train, file) for file in os.listdir(args.train) ]
if len(input_files) == 0:
raise ValueError(('There are no files in {}.\n' +
'This usually indicates that the channel ({}) was incorrectly specified,\n' +
'the data specification in S3 was incorrectly specified or the role specified\n' +
'does not have permission to access the data.').format(args.train, "train"))
raw_data = [ pd.read_csv(file, header=None, engine="python") for file in input_files ]
df = pd.concat(raw_data)
y = df.iloc[:,0]
X = df.iloc[:,1:]
solver = args.solver
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver=solver).fit(X, y)
from sklearn.externals import joblib
def model_fn(model_dir):
lr = joblib.dump(lr, "model.joblib")
return lr
In my sagemaker notebook I ran the following code
import os
import boto3
import re
import copy
import time
from time import gmtime, strftime
from sagemaker import get_execution_role
import sagemaker
role = get_execution_role()
region = boto3.Session().region_name
bucket=<bucket> # Replace with your s3 bucket name
prefix = <prefix>
output_path = 's3://{}/{}/{}'.format(bucket, prefix,'output_data_dir')
train_data = 's3://{}/{}/{}'.format(bucket, prefix, 'train')
train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
from sagemaker.sklearn.estimator import SKLearn
sklearn = SKLearn(
entry_point='train.py',
train_instance_type="ml.m4.xlarge",
role=role,output_path = output_path,
sagemaker_session=sagemaker.Session(),
hyperparameters={'solver':'liblinear'})
I'm fitting my model here
sklearn.fit({'train': train_channel})
Now, for creating endpoint,
from sagemaker.predictor import csv_serializer
predictor = sklearn.deploy(1, 'ml.m4.xlarge')
While trying to create endpoint, it is throwing
ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Could not find model data at s3://<bucket>/<prefix>/output_data_dir/sagemaker-scikit-learn-x-y-z-000/output/model.tar.gz.
I checked my S3 bucket. Inside my output_data_dir there is sagemaker-scikit-learn-x-y-z-000 dir which has debug-output\training_job_end.ts file. An additional directory got created outside my <prefix> folder with name sagemaker-scikit-learn-x-y-z-000 that has source\sourcedir.tar.gz file. Generally whenever I trained my models with sagemaker built-in algorithms, output_data_dir\sagemaker-scikit-learn-x-y-z-000\output\model.tar.gz kind of files get created. Can someone please tell me where my scikit model got stored, how to push source\sourcedir.tar.gz inside my prefix code without having doing it manually and how to see contents of sourcedir.tar.gz?
Edit: I elaborated the question regarding prefix. Whenever I run sklearn.fit(), two files with same name sagemaker-scikit-learn-x-y-z-000 are getting created in my S3 bucket. One created inside my <bucket>/<prefix>/output_data_dir/sagemaker-scikit-learn-x-y-z-000/debug-output/training_job_end.ts and other file is created in <bucket>/sagemaker-scikit-learn-x-y-z-000/source/sourcedir.tar.gz. Why is the second file not created inside my <prefix> like the first one? What is contained in sourcedir.tar.gz file?
I am not sure if your model is really stored, if you can't find it in S3. While you define a function with the call of joblib.dump in your entry point script, I am having the call at the end of the main. For example:
# persist model
path = os.path.join(args.model_dir, "model.joblib")
joblib.dump(myestimator, path)
print('model persisted at ' + path)
Then the file can be found in ..\output\model.tar.gz just as in your other cases. In order to double-check that is created you maybe want to have a print statement that can be found in the protocol of the training.
You must dump the model as the last step of your training code. Currently you are doing it in the wrong place, as model_fn goal is to load the model for inference, not for training.
Add the dump after training:
lr = LogisticRegression(solver=solver).fit(X, y)
lr = joblib.dump(lr, args.model_dir)
Change model_fn() to load the model instead of dumping it.
See more here.
This post here explains it well:
https://towardsdatascience.com/deploying-a-pre-trained-sklearn-model-on-amazon-sagemaker-826a2b5ac0b6
In short, the tar.gz gets created by tar-gz-ing the model.joblib binary which was first created joblib.dump. To quote the article:
#Build tar file with model data + inference code
bashCommand = "tar -cvpzf model.tar.gz model.joblib inference.py"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
The inference.py is probably optional.
I am trying to convert dot file into a png or jpeg file where I can view the Random Forest Tree. I am following this tutorial: https://towardsdatascience.com/how-to-visualize-a-decision-tree-from-a-random-forest-in-python-using-scikit-learn-38ad2d75f21c.
I am getting error FileNotFoundError: [WinError 2] The system cannot find the file specified
I can see that tree.dot is there and I am able to open it. Trying to find why it is not reading it? Thanks.
from sklearn.datasets import load_iris
iris = load_iris()
# Model (can also use single decision tree)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10)
# Train
model.fit(iris.data, iris.target)
# Extract single tree
estimator = model.estimators_[5]
from sklearn.tree import export_graphviz
# Export as dot file
export_graphviz(estimator, out_file='tree.dot',
feature_names = iris.feature_names,
class_names = iris.target_names,
rounded = True, proportion = False,
precision = 2, filled = True)
<<error occurs here>>
# Convert to png using system command (requires Graphviz)
from subprocess import call
call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])
# Display in jupyter notebook
from IPython.display import Image
Image(filename = 'tree.png')
I ran through docker - ubuntu image and ran: RUN apt-get install graphviz -y in the Dockerfile. It started to work. Then used dot -Tpng tree.dot -o tree.png
I have come across a strange error when using dlib.shape_predictor function.
This is my code:
import sys
import dlib
from skimage import io
import matplotlib.pyplot as plt
import os
predictor_model = os.path.join(os.path.dirname(__file__),"Models","shape_predictor_68_face_landmarks.dat")
# Take the image file name from the command line
file_name = "/home/matt/Programming/Python/Face_detection/Images/wedding_photo.jpg"
# Test if they are equivalent
print predictor_model == "/home/matt/Programming/Python/Face_detection/Models/shape_predictor_68_face_landmarks.dat"
predictor_model = "/home/matt/Programming/Python/Face_detection/Models/shape_predictor_68_face_landmarks.dat"
# Create a HOG face detector using the built-in dlib class
face_detector = dlib.get_frontal_face_detector()
face_pose_predictor = dlib.shape_predictor(predictor_model)
If I use the predictor model as defined through relative path using os then I get an error:
ArgumentError: Python argument types in
shape_predictor.__init__(shape_predictor, unicode)
did not match C++ signature:
__init__(boost::python::api::object, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)
__init__(_object*)
If I hardcode the path as defined above the script runs without a problem. Any ideas what may have gone wrong?