I am trying to write Unit Tests for Cassandra but am not able to get it work. Here is the code:
CassandraLoggingModel.py:
import uuid
from cassandra.cqlengine import columns
from datetime import datetime
from cassandra.cqlengine.models import Model
class CassandraRunLog(Model):
pipeline_id = columns.Text(partition_key=True, max_length=180)
task_id = columns.Text(partition_key=True, max_length=180)
execution_date = columns.DateTime(partition_key=True)
created_at = columns.DateTime(primary_key=True, default=datetime.now())
host = columns.Text(max_length=1000)
run_as_unixname = columns.Text(max_length=1000)
logger = columns.Text(max_length=128)
level = columns.Text(max_length=16)
trace = columns.Text(max_length=10000)
msg = columns.Text(max_length=64000)
CassandraLogging.py
import sys
import logging
import traceback
import uuid
from datetime import datetime
from CassandraLoggingModel import CassandraRunLog
from cassandra.cqlengine import connection
from cassandra.auth import PlainTextAuthProvider
import cassandra
class CassandraHandler(logging.Handler):
def __init__(self, user, *args, **kwargs):
self.user = user
super(CassandraHandler, self).__init__(*args, **kwargs)
def emit(self, record):
print("emit called")
trace = "None"
exc = record.__dict__['exc_info']
if exc:
trace = traceback.format_exc(exc)
if hasattr(record, 'message'):
log_msg = record.message
else:
log_msg = self.format(record)
self.host = 'localhost'
self.keyspace = 'logging'
try:
auth_provider = PlainTextAuthProvider(username='some', password='some')
connection.setup([self.host], self.keyspace, auth_provider=auth_provider)
model = CassandraRunLog(host=self.user, created_at=datetime.now(), trace=trace, msg=log_msg)
model.save()
except Exception as e:
print(str(e))
test.py
import datetime
import logging
import mock
from CassandraLogging import CassandraHandler
#mock.patch('CassandraLoggingModel.CassandraRunLog')
def test_formatting(MockClassRunLog):
run_log = MockClassRunLog.return_value
# construct our logging handler
handler = CassandraHandler('name')
# Log an unformated message.
record = logging.LogRecord(name='pytest',
level=logging.INFO,
pathname='something',
lineno=0,
msg='something',
args=(),
exc_info=None,
func='test_formatting')
handler.emit(record)
# we should have a record added to the DB
run_log.save.assert_called_once_with()
I am trying to add a logging handler in python that stores the log message to a cassandra database. I am trying to test if model's save method is called. save method is implemented in Cassandra Model and CassandraRunLog inherits from that.
When I am running the test using command:
py.test test.py
I am getting the following error:
E AssertionError: Expected to be called once. Called 0 times.
Can someone please help ?
Never mind. I figured it out. The test was not able to connect to the database, so control was getting passed to the except block every time.
Related
I've been struggling with this for a couple months now and have tried a lot of different things to try to alleviate but am not sure what to do anymore. All the examples that I see are different than what I need and in my case it just wouldn't work.
To preface the problem, I have processor applications that get spawned by a manager as a docker container. The processor is a single class that gets run in a forever while loop that processes the same list of items over and over again and runs a function on them. The code I'm working with is quite large so I created a smaller version of the problem below.
this is how I create my engine
db.py
from os import getpid
from pymongo import MongoClient
_mongo_client = None
_mongo_client_pid = None
def get_mongodb_uri(MONGO_DB_HOST, MONGO_DB_PORT) -> str:
return 'mongodb://{}:{}/{}'.format(MONGO_DB_HOST, MONGO_DB_PORT, 'taskprocessor')
def get_db_engine():
global _mongo_client, _mongo_client_pid
curr_pid = getpid()
if curr_pid != _mongo_client_pid:
_mongo_client = MongoClient(get_mongodb_uri(), connect=False)
_mongo_client_pid = curr_pid
return _mongo_client
def get_db(name):
return get_db_engine()['taskprocessor'][name]
These are my DB models
processor.py
from uuid import uuid4
from taskprocessor.db import get_db
class ProcessorModel():
db = get_db("processors")
def __init__(self, **kwargs):
self.uid = kwargs.get('uid', str(uuid4()))
self.exceptions = kwargs.get('exceptions', [])
self.to_process = kwargs.get('to_process', [])
self.functions = kwargs.get('functions', ["int", "round"])
def save(self):
return self.db.insert_one(self.__dict__).inserted_id is not None
#classmethod
def get(cls, uid):
res = cls.db.find_one(dict(uid=uid))
return ProcessorModel(**res)
result.py
from uuid import uuid4
from taskprocessor.db import get_db
class ResultModel():
db = get_db("results")
def __init__(self, **kwargs):
self.uid = kwargs.get('uid', str(uuid4()))
self.res = kwargs.get('res', dict())
def save(self):
return self.db.insert_one(self.__dict__).inserted_id is not None
And my main.py that gets started as a docker container
to run a forever loop
import os
from time import sleep
from taskprocessor.db.processor import ProcessorModel
from taskprocessor.db.result import ResultModel
from multiprocessing import Pool
class Processor:
def __init__(self):
self.id = os.getenv("PROCESSOR_ID")
self.db_model = ProcessorModel.get(self.id)
self.to_process = self.db_model.to_process # list of floats [1.23, 1.535, 1.33499, 242.2352, 352.232]
self.functions = self.db_model.functions # list i.e ["round", "int"]
def run(self):
while True:
try:
pool = Pool(2)
res = list(pool.map(self.analyse, self.to_process))
print(res)
sleep(100)
except Exception as e:
self.db_model = ProcessorModel.get(os.getenv("PROCESSOR_ID"))
self.db_model.exceptions.append(f"exception {e}")
self.db_model.save()
print("Exception")
def analyse(self, item):
res = {}
for func in self.functions:
if func == "round":
res['round'] = round(item)
if func == "int":
res['int'] = int(item)
ResultModel(res=res).save()
return res
if __name__ == "__main__":
p = Processor()
p.run()
I've tried setting connect=False, or even trying to close the connection after the configuration but then end up with connection closed errors. I also tried using a system of recognizing the PID and giving a different client but that still did not help.
Almost all examples I see are where the DB access is not needed before the multiprocessing fork. In my case the initial configuration is heavy and cannot be efficient to do every single time in the process loop. Furthermore the items to process themselves depends on the data from the DB.
I can live with not being able to save the exceptions to the db object from the main pid.
I'm seeing the error logs around fork safety as well as hitting connection pool paused errors as as symptom of this issue.
If anybody sees this, I was using pymongo 4.0.2 and upgraded to 4.3.3 and not seeing the errors I was previously seeing.
I am using tornado framework for loading my machine learning model. I have a popularity class
import numpy as np
import pandas as pd
from pandas import DataFrame
class Popularity():
users_data = pd.read_csv('~/Desktop/LatentCollaborativeFiltering/lib/seed_data/ratings.csv')
movies_data = pd.read_csv('~/Desktop/LatentCollaborativeFiltering/lib/seed_data/movies.csv')
data = pd.merge(users_data, movies_data, left_on="movieId", right_on="movieId")
data = pd.DataFrame.sort_values(data, ['userId','movieId'],ascending=[0,1])
def __init__(self):
pass
def favoriteMovies(self, activeUser,N):
topMovies=pd.DataFrame.sort_values(self.data[self.data.userId==activeUser],['rating'],ascending=[0])[:N]
# return the title corresponding to the movies in topMovies
return list(topMovies.title)
def recommend_movies(self):
return "No recommendation"
Now i have another file to pickle an object of this class build_model.py
from __future__ import print_function
import os
from sklearn.externals import joblib
import pandas as pd
import numpy as np
from popularity import Popularity
if __name__ == "__main__":
popu = Popularity()
_CUR_DIR = os.path.dirname(os.path.realpath(__file__))
_SERIALIZATION_DIR = os.path.join(_CUR_DIR)
if not os.path.exists(_SERIALIZATION_DIR):
os.makedirs(_SERIALIZATION_DIR)
model_filename = os.path.join(_SERIALIZATION_DIR, "model.pkl")
joblib.dump(popu, model_filename)
print("Successfully Built and Picked into models folder")
This now builds the model and successfully saves the model in same directory as model.pkl file. But when I load the model in the torando it gives me following error
[I 180702 06:30:44 server:40] Loading Latent Collaborative Filtering model...
Traceback (most recent call last):
File "run.py", line 7, in <module>
server.main()
File "/home/rabin/Desktop/LatentCollaborativeFiltering/movies-api/app/server.py", line 45, in main
MODELS["recommender"] = pickle.load(infile)
ModuleNotFoundError: No module named 'Popularity'
My server.py file is
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
import logging.config
import tornado.ioloop
import tornado.web
from tornado.options import options
from sklearn.externals import joblib
from app.settings import MODEL_DIR, ROOT_DIR, _CUR_DIR
from app.handler import IndexHandler, IrisPredictionHandler
from app.popularity import Popularity
import pickle
MODELS = {}
def load_model(pickle_filename):
return joblib.load(pickle_filename)
def main():
# Get the Port and Debug mode from command line options or default in settings.py
options.parse_command_line()
# create logger for app
logger = logging.getLogger('app')
logger.setLevel(logging.INFO)
FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(format=FORMAT)
# Load ML Models
logger.info("Loading Latent Collaborative Filtering model...")
#MODELS["recommender"] = load_model(os.path.join(_CUR_DIR, 'model.pkl'))
#MODELS["recommender"] = load_model('model.pkl')
with open(os.path.join(_CUR_DIR, 'model.pkl'), 'rb') as infile:
MODELS["recommender"] = pickle.load(infile)
urls = [
(r"/$", IndexHandler),
(r"/api/recommender/(?P<action>[a-zA-Z]+)?", RecommenderHandler, # action is function in handler
dict(model=MODELS["recommender"]))
]
# Create Tornado application
application = tornado.web.Application(
urls,
debug=options.debug,
autoreload=options.debug)
# Start Server
logger.info("Starting App on Port: {} with Debug Mode: {}".format(options.port, options.debug))
application.listen(options.port)
tornado.ioloop.IOLoop.current().start()
And my handler.py file is
"""
Request Handlers
"""
import tornado.web
from tornado import concurrent
from tornado import gen
from concurrent.futures import ThreadPoolExecutor
from app.base_handler import BaseApiHandler
from app.settings import MAX_MODEL_THREAD_POOL
from app.popularity import Popularity
class IndexHandler(tornado.web.RequestHandler):
"""APP is live"""
def get(self):
self.write("Movie Recommendation System is Live")
def head(self):
self.finish()
class RecommenderHandler(BaseApiHandler):
_thread_pool = ThreadPoolExecutor(max_workers=MAX_MODEL_THREAD_POOL)
def initialize(self, model, *args, **kwargs):
self.model = model
super().initialize(*args, **kwargs)
#concurrent.run_on_executor(executor='_thread_pool')
def _blocking_predict(self, X):
target_values = self.model.favoriteMovies(5,10)
return target_values
#gen.coroutine
def predict(self, data):
if type(data) == dict:
data = [data]
X = []
for item in data:
record = (item.get("user_id"))
X.append(record)
results = yield self._blocking_predict(X)
self.respond(results)
I have searched too much for the solution but has not found yet that worked for me.
I cannot load from the console too
I am using radish bdd with selenium to test my django app, however sometimes django ask to delete database because it's already exists in database. here's my terrain.py:
import os
import django
from django.test.runner import DiscoverRunner
from django.test import LiveServerTestCase
from radish import before, after
from selenium import webdriver
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'tangorblog.settings.features')
BASE_URL = os.environ.get('BASE_URL', 'http://localhost:8000')
#before.each_scenario
def setup_django_test(scenario):
django.setup()
scenario.context.test_runner = DiscoverRunner()
scenario.context.test_runner.setup_test_environment()
scenario.context.old_db_config =\
scenario.context.test_runner.setup_databases()
scenario.context.base_url = BASE_URL
scenario.context.test_case = LiveServerTestCase()
scenario.context.test_case.setUpClass()
scenario.context.browser = webdriver.Chrome()
#after.each_scenario
def teardown_django(scenario):
scenario.context.browser.quit()
scenario.context.test_case.tearDownClass()
del scenario.context.test_case
scenario.context.test_runner.teardown_databases(
scenario.context.old_db_config)
scenario.context.test_runner.teardown_test_environment()
I think that, I can somehow could alter this on
scenario.context.old_db_config =\
scenario.context.test_runner.setup_databases()
But I don't know how. Any help?
It seems to me that recreating the database for every scenario would end up being highly inefficient (and super slow). It should only be necessary to create the database once per test run and then drop it at the end.
I've come up with a solution I think integrates better with Django. It allows you to run the tests with manage.py test, only creates/drops the database once per test run, and clears the database tables after every feature is tested.
Note that this runs both the Django unit tests and radish tests by default. To run just the radish tests, you can do RADISH_ONLY=1 manage.py test. Also, for the live server/Selenium tests to work, you have to run manage.py collectstatic first.
# package/settings.py
TEST_RUNNER = 'package.test.runner.RadishTestRunner'
# package/test/runner
import os
from django.test.runner import DiscoverRunner
import radish.main
class RadishTestRunner(DiscoverRunner):
def run_suite(self, suite, **kwargs):
# Run unit tests
if os.getenv('RADISH_ONLY') == '1':
result = None
else:
result = super().run_suite(suite, **kwargs)
# Run radish behavioral tests
self._radish_result = radish.main.main(['features'])
return result
def suite_result(self, suite, result, **kwargs):
if result is not None:
# Django unit tests were run
result = super().suite_result(suite, result, **kwargs)
else:
result = 0
result += self._radish_result
return result
# radish/world.py
from django.db import connections
from django.test.testcases import LiveServerThread, _StaticFilesHandler
from django.test.utils import modify_settings
from radish import pick
from selenium import webdriver
#pick
def get_browser():
return webdriver.Chrome()
#pick
def get_live_server():
live_server = LiveServer()
live_server.start()
return live_server
class LiveServer:
host = 'localhost'
port = 0
server_thread_class = LiveServerThread
static_handler = _StaticFilesHandler
def __init__(self):
connections_override = {}
for conn in connections.all():
if conn.vendor == 'sqlite' and conn.is_in_memory_db():
conn.allow_thread_sharing = True
connections_override[conn.alias] = conn
self.modified_settings = modify_settings(ALLOWED_HOSTS={'append': self.host})
self.server_thread = self.server_thread_class(
self.host,
self.static_handler,
connections_override=connections_override,
port=self.port,
)
self.server_thread.daemon = True
#property
def url(self):
self.server_thread.is_ready.wait()
return 'http://{self.host}:{self.server_thread.port}'.format(self=self)
def start(self):
self.modified_settings.enable()
self.server_thread.start()
self.server_thread.is_ready.wait()
if self.server_thread.error:
self.stop()
raise self.server_thread.error
def stop(self):
if hasattr(self, 'server_thread'):
self.server_thread.terminate()
for conn in connections.all():
if conn.vendor == 'sqlite' and conn.is_in_memory_db():
conn.allow_thread_sharing = False
self.modified_settings.disable()
# radish/terrain.py
from django.db import connections, transaction
from radish import world, before, after
#before.all
def set_up(features, marker):
world.get_live_server()
#after.all
def tear_down(features, marker):
browser = world.get_browser()
live_server = world.get_live_server()
browser.quit()
live_server.stop()
#before.each_scenario
def set_up_scenario(scenario):
live_server = world.get_live_server()
scenario.context.base_url = live_server.url
scenario.context.browser = world.get_browser()
# XXX: Only works with the default database
# XXX: Assumes the default database supports transactions
scenario.context.transaction = transaction.atomic(using='default')
scenario.context.transaction.__enter__()
#after.each_scenario
def tear_down_scenario(scenario):
transaction.set_rollback(True, using='default')
scenario.context.transaction.__exit__(None, None, None)
for connection in connections.all():
connection.close()
#Wyatt, again I'm just gonna modify your answer. I have try and run your solution, however it didn't manage to make each scenario independent, I even encounter an Integrity error when I try to make model object inside scenario. Regardless I still use your solution (especially RadishTestRunner, as the idea comes from you. I modified it so I could run django unittest separately from radish. I use LiveServerTestCase directly and remove LiveServer as I notice the similirity between the two, except that LiveServerTestCase inherits from TransactionTestCase and it also has the LiveServerThread and _StaticFilesHandler built in. Here's how it is:
# package/test/runner.py
import os
from django.test.runner import DiscoverRunner
import radish.main
class RadishTestRunner(DiscoverRunner):
radish_features = ['features']
def run_suite(self, suite, **kwargs):
# run radish test
return radish.main.main(self.radish_features)
def suite_result(self, suite, result, **kwargs):
return result
def set_radish_features(self, features):
self.radish_features = features
# radish/world.py
from django.test import LiveServerTestCase
from radish import pick
from selenium import webdriver
#pick
def get_browser():
return webdriver.Chrome()
#pick
def get_live_server():
live_server = LiveServerTestCase
live_server.setUpClass()
return live_server
# radish/terrain.py
from radish import world, before, after
from selenium import webdriver
#before.all
def set_up(features, marker):
world.get_live_server()
#after.all
def tear_down(features, marker):
live_server = world.get_live_server()
live_server.tearDownClass()
#before.each_scenario
def set_up_scenario(scenario):
live_server = world.get_live_server()
scenario.context.browser = webdriver.Chrome()
scenario.context.base_url = live_server.live_server_url
scenario.context.test_case = live_server()
scenario.context.test_case._pre_setup()
#after.each_scenario
def tear_down_scenario(scenario):
scenario.context.test_case._post_teardown()
scenario.context.browser.quit()
That's it. This also fix the problem with PostgreSQL on my other question that you point out. I also open and quit browser on each scenario, as it gives me more control over the browser inside scenario. Thank you so much for you effort to point me at the right direction.
Finally I return to PostgreSQL.
PostgreSQL seem to better than MySQL in terms of speed. It greatly reduced the time to run test.
And oh ya, I need to run ./manage.py collectstatic first after specify STATIC_ROOT in django settings file.
I also modify RadishTestRunner, so instead of running with RADISH_ONLY=1, I could run it with python manage.py radish /path/to/features/file. Here's my radish command:
# package.management.commands.radish
from __future__ import absolute_import
import sys
from django.core.management.base import BaseCommand, CommandError
from package.test.runner import RadishTestRunner
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('features', nargs='+', type=str)
def handle(self, *args, **options):
test_runner = RadishTestRunner(interactive=False)
if options['features']:
test_runner.set_radish_features(options['features'])
result = test_runner.run_suite(None)
if result:
sys.exit(result)
By using radish with django management command, we have control over which feature file we want to run.
I'm trying to set some tests for my server.
According to a friend's recommendation I wanna use unittest and mock. For this purpose I wrote some lines but when I try to execute the script I got an importError.
Traceback (most recent call last):
File "test_creds.py", line 146, in <module>
unittest.main()
AttributeError: 'module' object has no attribute 'main'
Do I have some mistake with the imports?
Thanks!
# coding=utf-8
import sys, os
import unittest, mock, kiss
from twisted.python import log
from twisted.trial import unittest
from twisted.cred import credentials
class CredentialsChecker(unittest.TestCase):
"""
Testing the server credentials handling
"""
def _setUp_databases(self):
username_1 = 'user_name'
password_1 = 'user_pass'
email_1 = 'user#mail.org'
create_user_profile = mock.Mock()
self.db = mock.Mock()
self.db.username = username_1
self.db.password = password_1
self.db.email = email_1
def setUp(self):
log.startLogging(sys.stdout)
log.msg(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Flushing database")
#management.execute_from_command_line(['manage.py', 'flush',\
#'--noinput'])
log.msg(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Populating database")
#management.execute_from_command_line(['manage.py', 'createsuperuser',\
#'--username', 'superuser_name', '--email', 'superuser_name#email.org',\
#'--noinput'])
self._setUp_databases()
log.msg(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Running tests")
return
"""
Log in with valid credentianls. The server should return True
"""
def test_GoodCredentials(self):
creds = credentials.UsernamePassword('user_name', 'user_pass')
checker = DjangoAuthChecker()
d = checker.requestAvatarId(creds)
if __name__ == '__main__':
unittest.main()
If you create an object inheriting from the class unittest.TestCase test you have to use the native Python unittest module.
I don't understand how to filter for structured properties
I keep getting there error:
BadFilterError: invalid filter: Cannot query for unindexed property author.email.
I'm attempting to create an entity in my test setUp.
I'm using the code from the GAE tutorial.
Here is the model:
class Author(ndb.Model):
"""Sub model for representing an author."""
identity = ndb.StringProperty(indexed=False)
email = ndb.StringProperty(indexed=True)
class Greeting(ndb.Model):
"""A main model for representing an individual Guestbook entry."""
author = ndb.StructuredProperty(Author)
content = ndb.StringProperty(indexed=False)
date = ndb.DateTimeProperty(auto_now_add=True)
Here is the simple query:
Author.query( Author.email == "bryan#mail.com").get()
Here is my testing code in functional_tests.py:
import sys, os, subprocess, time, unittest, shlex
sys.path.append("/usr/local/google_appengine")
sys.path.append("/usr/local/google_appengine/lib/yaml/lib")
sys.path.append("/usr/local/google_appengine/lib/webapp2-2.5.2")
sys.path.append("/usr/local/google_appengine/lib/django-1.5")
sys.path.append("/usr/local/google_appengine/lib/cherrypy")
sys.path.append("/usr/local/google_appengine/lib/concurrent")
sys.path.append("/usr/local/google_appengine/lib/docker")
sys.path.append("/usr/local/google_appengine/lib/requests")
sys.path.append("/usr/local/google_appengine/lib/websocket")
sys.path.append("/usr/local/google_appengine/lib/fancy_urllib")
sys.path.append("/usr/local/google_appengine/lib/antlr3")
from selenium import webdriver
from google.appengine.api import memcache, apiproxy_stub, apiproxy_stub_map
from google.appengine.ext import db
from google.appengine.ext import testbed
from google.appengine.datastore import datastore_stub_util
from google.appengine.tools.devappserver2 import devappserver2
class NewVisitorTest(unittest.TestCase):
def setUp(self):
# Start the dev server
cmd = "/usr/local/bin/dev_appserver.py /Users/Bryan/work/GoogleAppEngine/guestbook/app.yaml --port 8080 --storage_path /tmp/datastore --clear_datastore --skip_sdk_update_check"
self.dev_appserver = subprocess.Popen(shlex.split(cmd),
stdout=subprocess.PIPE)
time.sleep(2) # Important, let dev_appserver start up
self.testbed = testbed.Testbed()
self.testbed.setup_env(app_id='dermal')
self.testbed.activate()
self.testbed.init_user_stub()
# Create a consistency policy that will simulate the High Replication consistency model.
# with a probability of 1, the datastore should be available.
self.policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=1)
# Initialize the datastore stub with this policy.
self.testbed.init_datastore_v3_stub(datastore_file="/tmp/datastore/datastore.db", use_sqlite=True, consistency_policy=self.policy)
self.testbed.init_memcache_stub()
self.datastore_stub = apiproxy_stub_map.apiproxy.GetStub('datastore_v3')
# setup the dev_appserver
APP_CONFIGS = ['app.yaml']
# setup client to make sure
from guestbook import Author, Greeting
if not ( Author.query( Author.email == "bryan#mail.com").get()):
logging.info("create Admin")
client = Author(
email = "bryan#mail.com",
).put()
Assert( Author.query( Author.email == "bryan#mail.com").get() )
self.browser = webdriver.Firefox()
self.browser.implicitly_wait(3)
def tearDown(self):
self.browser.quit()
self.testbed.deactivate()
self.dev_appserver.terminate()
def test_submit_anon_greeting(self):
self.browser.get('http://localhost:8080')
self.browser.find_element_by_name('content').send_keys('Anonymous test post')
self.browser.find_element_by_name('submit').submit()
Assert.assertEquals(driver.getPageSource().contains('Anonymous test post'))
You're querying the Author model. But in your structure, Authors only exist as part of a Greeting. So you should be creating a Greeting and querying that model.
Greeting(
author=Author(email="bryan#mail.com"),
content="Hello there!")
).put()
Greeting.query(Greeting.author.email=="bryan#mail.com").get()