ModuleNotFoundError When loading a pickled file in tornado

ModuleNotFoundError When loading a pickled file in tornado - python

I am using tornado framework for loading my machine learning model. I have a popularity class
import numpy as np
import pandas as pd
from pandas import DataFrame
class Popularity():
users_data = pd.read_csv('~/Desktop/LatentCollaborativeFiltering/lib/seed_data/ratings.csv')
movies_data = pd.read_csv('~/Desktop/LatentCollaborativeFiltering/lib/seed_data/movies.csv')
data = pd.merge(users_data, movies_data, left_on="movieId", right_on="movieId")
data = pd.DataFrame.sort_values(data, ['userId','movieId'],ascending=[0,1])
def __init__(self):
pass
def favoriteMovies(self, activeUser,N):
topMovies=pd.DataFrame.sort_values(self.data[self.data.userId==activeUser],['rating'],ascending=[0])[:N]
# return the title corresponding to the movies in topMovies
return list(topMovies.title)
def recommend_movies(self):
return "No recommendation"
Now i have another file to pickle an object of this class build_model.py
from __future__ import print_function
import os
from sklearn.externals import joblib
import pandas as pd
import numpy as np
from popularity import Popularity
if __name__ == "__main__":
popu = Popularity()
_CUR_DIR = os.path.dirname(os.path.realpath(__file__))
_SERIALIZATION_DIR = os.path.join(_CUR_DIR)
if not os.path.exists(_SERIALIZATION_DIR):
os.makedirs(_SERIALIZATION_DIR)
model_filename = os.path.join(_SERIALIZATION_DIR, "model.pkl")
joblib.dump(popu, model_filename)
print("Successfully Built and Picked into models folder")
This now builds the model and successfully saves the model in same directory as model.pkl file. But when I load the model in the torando it gives me following error
[I 180702 06:30:44 server:40] Loading Latent Collaborative Filtering model...
Traceback (most recent call last):
File "run.py", line 7, in <module>
server.main()
File "/home/rabin/Desktop/LatentCollaborativeFiltering/movies-api/app/server.py", line 45, in main
MODELS["recommender"] = pickle.load(infile)
ModuleNotFoundError: No module named 'Popularity'
My server.py file is
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
import logging.config
import tornado.ioloop
import tornado.web
from tornado.options import options
from sklearn.externals import joblib
from app.settings import MODEL_DIR, ROOT_DIR, _CUR_DIR
from app.handler import IndexHandler, IrisPredictionHandler
from app.popularity import Popularity
import pickle
MODELS = {}
def load_model(pickle_filename):
return joblib.load(pickle_filename)
def main():
# Get the Port and Debug mode from command line options or default in settings.py
options.parse_command_line()
# create logger for app
logger = logging.getLogger('app')
logger.setLevel(logging.INFO)
FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(format=FORMAT)
# Load ML Models
logger.info("Loading Latent Collaborative Filtering model...")
#MODELS["recommender"] = load_model(os.path.join(_CUR_DIR, 'model.pkl'))
#MODELS["recommender"] = load_model('model.pkl')
with open(os.path.join(_CUR_DIR, 'model.pkl'), 'rb') as infile:
MODELS["recommender"] = pickle.load(infile)
urls = [
(r"/$", IndexHandler),
(r"/api/recommender/(?P<action>[a-zA-Z]+)?", RecommenderHandler, # action is function in handler
dict(model=MODELS["recommender"]))
]
# Create Tornado application
application = tornado.web.Application(
urls,
debug=options.debug,
autoreload=options.debug)
# Start Server
logger.info("Starting App on Port: {} with Debug Mode: {}".format(options.port, options.debug))
application.listen(options.port)
tornado.ioloop.IOLoop.current().start()
And my handler.py file is
"""
Request Handlers
"""
import tornado.web
from tornado import concurrent
from tornado import gen
from concurrent.futures import ThreadPoolExecutor
from app.base_handler import BaseApiHandler
from app.settings import MAX_MODEL_THREAD_POOL
from app.popularity import Popularity
class IndexHandler(tornado.web.RequestHandler):
"""APP is live"""
def get(self):
self.write("Movie Recommendation System is Live")
def head(self):
self.finish()
class RecommenderHandler(BaseApiHandler):
_thread_pool = ThreadPoolExecutor(max_workers=MAX_MODEL_THREAD_POOL)
def initialize(self, model, *args, **kwargs):
self.model = model
super().initialize(*args, **kwargs)
#concurrent.run_on_executor(executor='_thread_pool')
def _blocking_predict(self, X):
target_values = self.model.favoriteMovies(5,10)
return target_values
#gen.coroutine
def predict(self, data):
if type(data) == dict:
data = [data]
X = []
for item in data:
record = (item.get("user_id"))
X.append(record)
results = yield self._blocking_predict(X)
self.respond(results)
I have searched too much for the solution but has not found yet that worked for me.
I cannot load from the console too

Related

Python blockchain. NameError: name 'Chain' is not defined

Today I was building a blockchain in python off of this tutorial, since I'm interested in cryptocurrency. I was ready to launch it, when I got this error.
Traceback (most recent call last):
File "blockchain.py", line 9, in <module>
class Chain(object):
File "blockchain.py", line 17, in Chain
blockchain = Chain()
NameError: name 'Chain' is not defined
I don't really know why this is happening, to me there doesn't really seem to be a problem. Here is part of the code where it is having the error:
import hashlib
import json
from textwrap import dedent
from time import time
from hashlib import sha256
from uuid import uuid4
from flask import Flask, jsonify, request
class Chain(object):
def __init__(self):
self.chain = []
self.current_transactions = []
self.new_block(previous_hash=1, proof=100)
app = Flask(__name__)
node_indentifier = str(uuid4()).replace('-', '')
blockchain = Chain()
If you need more of the code then I don't mind giving more.
Thanks in advance!

It is simple as indentation.
You wrote
chain = Chain()
In the class. The correct code is just moving the last lines back.
import hashlib
import json
from textwrap import dedent
from time import time
from hashlib import sha256
from uuid import uuid4
from flask import Flask, jsonify, request
class Chain(object):
def __init__(self):
self.chain = []
self.current_transactions = []
self.new_block(previous_hash=1, proof=100)
app = Flask(__name__)
node_indentifier = str(uuid4()).replace('-', '')
blockchain = Chain()

IIUC you are trying to call the blockchain = Chail() outside the class Chain. Your indentation is wrong. This should work
class Chain(object):
def __init__(self):
self.chain = []
self.current_transactions = []
self.new_block(previous_hash=1, proof=100)
app = Flask(__name__)
node_indentifier = str(uuid4()).replace('-', '')
blockchain = Chain()

How to trigger a function without importing everything?

I programmed a gateway to a opcua-server with python-opcua.
The gateway is subscribing some values in the opcua. That is working good and fast.
Now I want to call a script that writes to the opcua.
In principle, it works too. But because I have to import the whole gateway(and all opcua stuff), it is very slow...
My Question: Is is possible to trigger a function in my class-instance without imorting everything?
To start e.g. function setBool(), I have to import Gateway...
#!/usr/bin/env python3.5 -u
# -*- coding: utf-8 -*-
import time
import sys
import logging
from logging.handlers import RotatingFileHandler
from threading import Thread
from opcua import Client
from opcua import ua
from subscribeOpcua import SubscribeOpcua
from cmdHandling import CmdHandling
from keepConnected import KeepConnected
class Gateway(object):
def __init__(self):
OPCUA_IP = '1.25.222.222'
OPCUA_PORT = '4840'
OPCUA_URL = "opc.tcp://{}:{}".format(OPCUA_IP, str(OPCUA_PORT))
addr = "OPCUA-URL:{}.".format(OPCUA_URL)
# Setting up opcua-handler
self.client = Client(OPCUA_URL)
self.opcuaHandlers = [SubscribeOpcua()]
# Connect to opcua
self.connecter = KeepConnected(self.client,self.opcuaHandlers)
self.connecter.start()
def setBool(self, client):
"""Set e boolean variable on opcua-server.
"""
path = ["0:Objects","2:DeviceSet"...]
root = client.get_root_node()
cmd2opcua = root.get_child(path)
cmd2opcua.set_value(True)
if __name__ == "__main__":
"""Open connecter when gateway is opened directly.
"""
connect = Gateway()

The only way to prevent a code from runing when importing a module is to put it inside a method:
def import_first_part():
global re
global defaultdict
print('import this first part')
# import happen locally
# because when you do `import re` actually
# re = __import__('re')
import re
from collections import defaultdict
def import_second_part():
print('import pandas')
# really unnecessary check here because if we import
# pandas for the second time it will just retrieve the object of module
# the code of module is executed only in the first import in life of application.
if 'pandas' in globals():
return
global pandas
import pandas
def use_regex():
import_first_part()
# do something here
if __name__ == '__main__':
use_regex()
re.search('x', 'xb') # works fine
I checked that 'pandas' is in global scope before reimport it again but really this is not necessary, because when you import a module for the second time it's just retrieved no heavy calculation again.

Unit Tests for Python: Mock Patch

I am trying to write Unit Tests for Cassandra but am not able to get it work. Here is the code:
CassandraLoggingModel.py:
import uuid
from cassandra.cqlengine import columns
from datetime import datetime
from cassandra.cqlengine.models import Model
class CassandraRunLog(Model):
pipeline_id = columns.Text(partition_key=True, max_length=180)
task_id = columns.Text(partition_key=True, max_length=180)
execution_date = columns.DateTime(partition_key=True)
created_at = columns.DateTime(primary_key=True, default=datetime.now())
host = columns.Text(max_length=1000)
run_as_unixname = columns.Text(max_length=1000)
logger = columns.Text(max_length=128)
level = columns.Text(max_length=16)
trace = columns.Text(max_length=10000)
msg = columns.Text(max_length=64000)
CassandraLogging.py
import sys
import logging
import traceback
import uuid
from datetime import datetime
from CassandraLoggingModel import CassandraRunLog
from cassandra.cqlengine import connection
from cassandra.auth import PlainTextAuthProvider
import cassandra
class CassandraHandler(logging.Handler):
def __init__(self, user, *args, **kwargs):
self.user = user
super(CassandraHandler, self).__init__(*args, **kwargs)
def emit(self, record):
print("emit called")
trace = "None"
exc = record.__dict__['exc_info']
if exc:
trace = traceback.format_exc(exc)
if hasattr(record, 'message'):
log_msg = record.message
else:
log_msg = self.format(record)
self.host = 'localhost'
self.keyspace = 'logging'
try:
auth_provider = PlainTextAuthProvider(username='some', password='some')
connection.setup([self.host], self.keyspace, auth_provider=auth_provider)
model = CassandraRunLog(host=self.user, created_at=datetime.now(), trace=trace, msg=log_msg)
model.save()
except Exception as e:
print(str(e))
test.py
import datetime
import logging
import mock
from CassandraLogging import CassandraHandler
#mock.patch('CassandraLoggingModel.CassandraRunLog')
def test_formatting(MockClassRunLog):
run_log = MockClassRunLog.return_value
# construct our logging handler
handler = CassandraHandler('name')
# Log an unformated message.
record = logging.LogRecord(name='pytest',
level=logging.INFO,
pathname='something',
lineno=0,
msg='something',
args=(),
exc_info=None,
func='test_formatting')
handler.emit(record)
# we should have a record added to the DB
run_log.save.assert_called_once_with()
I am trying to add a logging handler in python that stores the log message to a cassandra database. I am trying to test if model's save method is called. save method is implemented in Cassandra Model and CassandraRunLog inherits from that.
When I am running the test using command:
py.test test.py
I am getting the following error:
E AssertionError: Expected to be called once. Called 0 times.
Can someone please help ?

Never mind. I figured it out. The test was not able to connect to the database, so control was getting passed to the except block every time.

How can I use mock for testing inside greenlet?

I use bottle & gevent for my python (2.7.6) application.
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from gevent import spawn, monkey
from bottle import Bottle
from .settings import MONGODB_HOST, MONGODB_PORT, MONGODB_NAME
monkey.patch_all()
mongo_client = MongoClient(MONGODB_HOST, MONGODB_PORT)
db = mongo_client[MONGODB_NAME]
class MyApp(object):
def insert_event(self):
data = {'a': self.a, 'b': self.b} # some data
db.events.insert(data)
def request(self):
# request data processing...
spawn(self.insert_event)
return {}
app = Bottle()
app.route('/', method='POST')(MyApp().request)
And I want to test it with mongomock (https://github.com/vmalloc/mongomock).
from __future__ import unicode_literals
from unittest import TestCase
from webtest import TestApp
from mock import patch
from mongomock import MongoClient
from ..app import app as my_app
db = MongoClient().db
#patch('my_app.app.db', db)
class TestViews(TestCase):
def setUp(self):
self.app = TestApp(ssp_app)
self.db = db
def test_request(self):
response = self.app.post('/', {})
last_event = self.db.events.find_one({})
self.assertTrue(last_event)
My test fails.
FAIL: test_request (my_app.tests.TestViews)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/mock/mock.py", line 1305, in patched
return func(*args, **keywargs)
File "/srv/mysite/my_app/tests/views.py", line 71, in test_request
self.assertTrue(last_event)
AssertionError: None is not true
It is work if I use self.insert_event without spawn. I tried to use patch.object, "with" statement, but without success...

I found solution. I need to mock gevent.spawn method. Because I get HTTP response before the coroutine ends. This my solution:
#patch('my_app.app.db', db)
#patch('my_app.app.spawn',
lambda method, *args, **kwargs: method(*args, **kwargs))
class TestViews(TestCase):

How to test with webtest and multiprocessing in GAE

Please tell me how to solve a following problem.I would like to use webtest with multiprocess but it fails.
It means that I would like to use parallel tests with db.I'm not paticular about multiprocess.
Probably,because of using other process,It can not call db.
result
======================================================================
FAIL: test_answer (testlab.LabTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Library/Python/2.7/site-packages/mock.py", line 1201, in patched
return func(*args, **keywargs)
File "/Users/unko/dropbox/test/testlab.py", line 48, in test_answer
self.assertEqual(u.param,"bar")
AssertionError: 'foo' != 'bar'
----------------------------------------------------------------------
testlab.py
#!-*- coding: utf-8 -*-
import unittest
import webtest
import webapp2
from google.appengine.ext import testbed,ndb
import json
import time
from google.appengine.api import apiproxy_stub_map
from google.appengine.api import urlfetch_stub
from mock import patch, Mock
from google.appengine.ext import db
from lab import Lab
from lab import Unko
import multiprocessing
class LabTestCase(unittest.TestCase):
def setUp(self):
app = webapp2.WSGIApplication([
('/lab', Lab),
('/(.*)', Lab)
],debug=True)
self.testapp = webtest.TestApp(app)
self.testbed = testbed.Testbed()
self.testbed.setup_env(app_id='sagifugoh')
self.testbed.activate()
self.testbed.init_datastore_v3_stub()
self.testbed.init_memcache_stub()
self.testbed.init_channel_stub()
self.testbed.init_urlfetch_stub()
def tearDown(self):
self.testbed.deactivate()
#patch('google.appengine.api.urlfetch.urlfetch_service_pb.URLFetchResponse')
def test_answer(self, URLFetchResponse):
def request(param):
response = self.testapp.post('/lab',{"key":"key","param":param})
def async(param):
p = multiprocessing.Process(target=request,args=[param])
jobs.append(p)
p.start()
jobs = []
u = Unko.get_or_insert("key")
u.param = "foo"
u.put()
async("bar")
time.sleep(2)
self.assertEqual(u.param,"bar")
if __name__ == '__main__':
unittest.main()
lab.py
#!-*- coding: utf-8 -*-
import webapp2
from google.appengine.ext import db
class Unko(db.Model):
param = db.StringProperty()
class Lab(webapp2.RequestHandler):
def post(self):
key = self.request.get('key')
param = self.request.get('param')
u = Unko.get_or_insert(key)
u.param = param
u.put()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

ModuleNotFoundError When loading a pickled file in tornado - python

Related

Python blockchain. NameError: name 'Chain' is not defined

How to trigger a function without importing everything?

Unit Tests for Python: Mock Patch

How can I use mock for testing inside greenlet?

How to test with webtest and multiprocessing in GAE

Categories

Resources