Pytest mocking nested object - python

I am trying to understand how to overcome an issue with mocking a python attribute coming from an imported module within a constructor.
I have a simplified Tableau python class which is defined like this:
import tableauserverclient as TSC
import pandas as pd
from os import environ
class Tableau(object):
def __init__(self):
# BYPASS ALL OF THIS AUTHENTICATION?
self.server = TSC.Server(environ.get("TABLEAU_URL"), use_server_version=False)
self.server.version = environ.get("TABLEAU_API_VERSION")
self.tableau_auth = TSC.PersonalAccessTokenAuth(
environ.get("TABLEAU_TOKEN_NAME"),
environ.get("TABLEAU_TOKEN_VALUE"),
site_id="",
)
self.tableau_server = self.server.auth.sign_in(self.tableau_auth)
def get_all_views(self) -> pd.DataFrame:
data = []
# HOW TO MOCK self.tableau_server.views?
for view in TSC.Pager(self.tableau_server.views):
data.append([view.name, view.id, view.workbook_id])
df = pd.DataFrame(data, columns=["View", "Id", "Workbook_Id"])
return df
How can I mock the output of self.tableau_server.views in get_all_views() from pytest to return a mocked list of views...
[
(id=1, name="view_a", "workbook_id"=1),
(id=2, name="view_b", "workbook_id"=2),
(id=3, name="view_c", "workbook_id"=3),
]
*Note - the return value needs to be iterable
Here's what I tried so far... I have been running into "module not found" errors and errors within the constructor - so I think mocking is not working correctly.
from pytest_mock import mocker
from connectors import Tableau
import tableauserverclient as TSC
import pandas as pd
from pandas.testing import assert_frame_equal
def get_mocked_tableau_views(mocker):
a = mocker.Mock()
a.name = "a"
a.id = 1
a.workbook_id = 1
b = mocker.Mock()
b.name = "b"
b.id = 2
b.workbook_id = 2
c = mocker.Mock()
c.name = "c"
c.id = 3
c.workbook_id = 3
return mocker.Mock(return_value=iter([a, b, c]))
def test_initialize(mocker):
mocker.patch.object(TSC, "__init__", return_value=None)
mocker.patch.object(TSC.__init__, "server", return_value=None)
mocker.patch.object(TSC.__init__, "server.version", return_value=None)
def test_get_all_views(mocker):
mocked_tsc = mocker.MagicMock()
mocked_tsc.Server.auth.sign_in = "test"
with mocker.patch(
"connectors.tableau.Tableau.tableau_server.views",
return_value=get_mocked_tableau_views,
):
tab = Tableau()
df_actual = tab.get_all_views()
df_expected = pd.DataFrame(
{"Id": [1, 2, 3], "View": ["a", "b", "c"], "Workbook_Id": [1, 2, 3]}
)
assert_frame_equal(df_actual, df_expected)
Thanks in advance!

I try to answer to the question:
# HOW TO MOCK self.tableau_server.views?
present in the method get_all_views() of the class Tableau.
I don't use pytest, but only the module mock.
import unittest
from unittest import mock
def get_mocked_tableau_views():
a = mock.Mock()
a.name = "a"
a.id = 1
a.workbook_id = 1
b = mock.Mock()
b.name = "b"
b.id = 2
b.workbook_id = 2
c = mock.Mock()
c.name = "c"
c.id = 3
c.workbook_id = 3
return iter([a, b, c])
class MyTestCase(unittest.TestCase):
def test_get_all_views(self):
tab = Tableau()
with mock.patch.object(tab, 'tableau_server') as mock_tableau_server:
mock_tableau_server.views.side_effect = self.get_mocked_tableau_views
df_actual = tab.get_all_views()
df_expected = pd.DataFrame({"Id": [1, 2, 3], "View": ["a", "b", "c"], "Workbook_Id": [1, 2, 3]})
self.assertEqual(df_expected, df_actual)
I think you have to adjust a bit my code to execute it on your system. I hope this answer can be useful for you.

Related

How to use mocking to compare real results

I have a sample class which reads a saved Tensorflow model and runs predictions
class Sample():
## all it does is creates a new column with predictions
def __init__(self, tf_model):
self.tf_model = tf_model
def tf_process(self, x):
##some other preprocessing
x["tf_predictions"] = self.tf_model.predict(x)
return x
def predict(self, x):
predictions = self.tf_process(x)
return predictions
Code for Unittesting without having to load the model:
import unittest
import pandas as pd
from unittest import TestCase, mock
from my_package.sample_model import Sample
class TestSample(unittest.TestCase):
def test_predict(self):
with mock.patch("Sample.tf_process") as process:
process.return_value = pd.DataFrame("hardcoded_value")
#to check: process.return_value = Output (Sample.predict())
Goal:
To compare process.return_value with the Output of predict method in Sample, but to do this I still have to load the model, I dont understand what is the use of mock here since i will have to anyway call the predict method to compare it with process.return_value. Any suggestions will be helpful
I think in your case it's better to use Mock(). You can create really good and simple tests without patch(). Just prepare all necessary mocked instances for initialization.
from unittest.mock import Mock
class TestSample(TestCase):
def test_predict(self):
# let's say predict() will return something... just an example
tf = Mock(predict=Mock(return_value=(10, 20, 30)))
df = pd.DataFrame({'test_col': (1, 2, 3)})
df = Sample(tf).predict(df)
# check column
self.assertTrue('tf_predictions' in df.columns)
# or check records
self.assertEqual(
df.to_dict('records'),
[
{'test_col': 1, 'tf_predictions': 10},
{'test_col': 2, 'tf_predictions': 20},
{'test_col': 3, 'tf_predictions': 30}
]
)
Also it's really helps when you need tests for complex services. Just an example:
class ClusterService:
def __init__(self, service_a, service_b, service_c) -> None:
self._service_a = service_a
self._service_b = service_b
self._service_c = service_c
# service_d, ... etc
def get_cluster_info(self, name: str):
self._service_a.send_something_to_somewhere(name)
data = {
'name': name,
'free_resources': self._service_b.get_free_resources(),
'current_price': self._service_c.get_price(name),
}
return ' ,'.join([
': '.join(['Cluster name', name]),
': '.join(['CPU', str(data['free_resources']['cpu'])]),
': '.join(['RAM', str(data['free_resources']['ram'])]),
': '.join(['Price', '{} $'.format(round(data['current_price']['usd'], 2))]),
])
class TestClusterService(TestCase):
def test_get_cluster_info(self):
cluster = ClusterService(
service_a=Mock(),
service_b=Mock(get_free_resources=Mock(return_value={'cpu': 100, 'ram': 200})),
service_c=Mock(get_price=Mock(return_value={'usd': 101.4999})),
)
self.assertEqual(
cluster.get_cluster_info('best name'),
'Cluster name: best name ,CPU: 100 ,RAM: 200 ,Price: 101.5 $'
)

Running Python Script From C# and Working With the Results

I need to run a python script from a .net core web API repository to get the cluster of the current user and recommend the list of courses based on his ratings. (I'm implementing a collaborative filtering recommender system with the use of sklearn.NearestNeighbor to help with user clustering.
List Course Repository
public async Task<IEnumerable<Course>> GetList(int? userId, string name, ContextSession session, bool includeDeleted = false)
{
var entity = GetEntities(session, includeDeleted).AsQueryable();
var courses = await entity.Where(obj => obj.Id > 0).ToListAsync();
SaveToCsv<Course>(courses, Environment.CurrentDirectory + "/csv/course.csv");
var ratings = _dbContext.Set<CourseRating>().AsQueryable();
if (!includeDeleted)
{
ratings = ratings.Where(obj => !obj.IsDeleted);
}
var i = await ratings.Where(obj => obj.Id > 0).ToListAsync();
SaveToCsv<CourseRating>(i, Environment.CurrentDirectory + "/csv/ratings_Final.csv");
ScriptRuntimeSetup setup = Python.CreateRuntimeSetup(null);
ScriptRuntime runtime = new ScriptRuntime(setup);
ScriptEngine engine = Python.GetEngine(runtime);
ScriptSource source = engine.CreateScriptSourceFromFile(Environment.CurrentDirectory + "/CB_RS.py");
ScriptScope scope = engine.CreateScope();
List<String> argv = new List<String>();
argv.Add(session.UserId.ToString());
argv.Add(Environment.CurrentDirectory + "/csv/course.csv");
argv.Add(Environment.CurrentDirectory + "/csv/ratings_Final.csv");
engine.GetSysModule().SetVariable("argv", argv);
try
{
source.Execute(scope);
}
catch(Exception ex)
{
var error = ex;
}
return await entity.Where(obj => obj.Id > 0).ToListAsync();
}
The Python Script
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import operator
def get_rating_course_user(courses_path, ratings_path):
course_df = pd.read_csv(courses_path)
rating_df = pd.read_csv(ratings_path)
#drop useless columns
#course_df.drop(columns_list, axis = 1)
#rating_df.drop([columns_list], axis = 1)
#merge 2 dataset on courseID
df = rating_df.merge(course_df, on = 'courseId')
# Get the ratings per course per user
pvt = df.pivot_table(index='userId',columns='name',values='rating')
#convert all NaN to -1.
pvt.fillna(-1,inplace=True)
#convert pivot table to sparce matrix
spm = csr_matrix(pvt)
return spm, pvt
#returns a list of recommended courses according to similar user's ratings
def make_prediction(userId, courses_path, ratings_path, n_neighbors):
recommended = []
spm, pvt = get_rating_course_user(courses_path, ratings_path)
# Creating, training the model
nn = NearestNeighbors(algorithm = 'brute')
nn.fit(spm)
#testing the model
test = pvt.iloc[userId,:].values.reshape(1,len(pvt.columns))
distance, suggested = nn.kneighbors(test,n_neighbors=n_neighbors)
for i in range(len(suggested[0])):
x = pvt.iloc[suggested[0][i],:].values.reshape(1,len(pvt.columns))
result = np.where(x >= 3)
for j in range(len(result[1])):
course = pvt.iloc[:,result[1][j]].name
if course not in recommended:
recommended.append(course)
return recommended
#return dictionary {id_course: average_rating} of k courses having the best average_rating
def recommend_K_best_courses(recommendedList, courses_path, k):
courses = pd.read_csv(courses_path)
courseIds = {}
for i in (recommendedList):
ID = (courses.loc[course_df.name==i].courseId).to_string(index=False)
rate = (courses.loc[course_df.name==i].average).to_string(index=False)
courseIds[int(ID)] = float(rate)
sort = dict(sorted(courseIds.items(), key=operator.itemgetter(1), reverse=True)[:k])
return sort
def main(userId, courses_path, ratings_path, n_neighbors=7, k=10){
recommended_list = make_prediction(userId, courses_path, ratings_path, n_neighbors)
return recommend_K_best_courses(recommended_list, courses_path, k)
}
if __name__ == "__main__":
main(userId, courses_path, ratings_path, n_neighbors=7, k=10)
I got the following error:
unexpected token '{'
My current problem is running the python script, then using the result returned from the main method of the script
I made a C# dev error, I opened the method main with '{' instead of ':'
def main(userId, courses_path, ratings_path, n_neighbors=7, k=10):
recommended_list = make_prediction(userId, courses_path, ratings_path, n_neighbors)
return recommend_K_best_courses(recommended_list, courses_path, k)

Error "AttributeError: 'Py4JError' object has no attribute 'message' building DecisionTreeModel

I'm following Chapter 4 from "Advanced Analytics with Spark" from O'Reilly. This book is in Scala and I'm having trouble converting this code to Python.
Scala Code
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.regression._
val rawData = sc.textFile("hdfs:///user/ds/covtype.data")
val data = rawData.map { line =>
val values = line.split(',').map(_.toDouble)
val featureVector = Vectors.dense(values.init)
val label = values.last - 1
LabeledPoint(label, featureVector)
}
val Array(trainData, cvData, testData) =
data.randomSplit(Array(0.8, 0.1, 0.1))
trainData.cache()
cvData.cache()
testData.cache()
import org.apache.spark.mllib.evaluation._
import org.apache.spark.mllib.tree._
import org.apache.spark.mllib.tree.model._
import org.apache.spark.rdd._
def getMetrics(model: DecisionTreeModel, data: RDD[LabeledPoint]):
MulticlassMetrics = {
val predictionsAndLabels = data.map(example =>
(model.predict(example.features), example.label)
)
new MulticlassMetrics(predictionsAndLabels)
}
val model = DecisionTree.trainClassifier(
trainData, 7, Map[Int,Int](), "gini", 4, 100)
val metrics = getMetrics(model, cvData)
metrics.confusionMatrix
My Python Code
from pyspark.sql.functions import col, split
import pyspark.mllib.linalg as linal
import pyspark.mllib.regression as regre
import pyspark.mllib.evaluation as eva
import pyspark.mllib.tree as tree
import pyspark.rdd as rd
raw_data = sc.textFile("covtype.data")
def fstDecisionTree(line):
values = list(map(float,line.split(",")))
featureVector = linal.Vectors.dense(values[:-1])
label = values[-1]-1
ret=regre.LabeledPoint(label, featureVector)
return regre.LabeledPoint(label, featureVector)
data = raw_data.map(fstDecisionTree)
trainData,cvData,testData=data.randomSplit([0.8,0.1,0.1])
trainData.cache()
cvData.cache()
testData.cache()
def help_lam(model):
def _help_lam(dataline):
print(dataline)
a=dataline.collect()
return (model.predict(a[1]),a[0])
return _help_lam
def getMetrics(model, data):
print(type(model),type(data))
predictionsAndLabels= data.map(help_lam(model))
return eva.MulticlassMetrics(predictionsAndLabels)
n_targets=7
max_depth=4
max_bin_count=100
model = tree.DecisionTree.trainClassifier(trainData, n_targets, {}, "gini", max_depth, max_bin_count)
metrics=getMetrics(model,cvData)
When I run this, I have this error in the method def _help_lam(dataline) inside of def help_lam(model) when I try to implicitly pass the map iteration in:
AttributeError: 'Py4JError' object has no attribute 'message'
I think the problem is in the model.predict function
From pyspark mllib/tree.py
Note: In Python, predict cannot currently be used within an RDD
transformation or action.
Call predict directly on the RDD instead.
What you can do is pass the feature vector directly like so
>>> rdd = sc.parallelize([[1.0], [0.0]])
>>> model.predict(rdd).collect()
[1.0, 0.0]
Edit:
An update to your getMetrics could be:
def getMetrics(model, data):
labels = data.map(lambda d: d.label)
features = data.map(lambda d: d.features)
predictions = model.predict(features)
predictionsAndLabels = predictions.zip(labels)
return eva.MulticlassMetrics(predictionsAndLabels)

RectilinearGridSource from tvtk.RectilinearGrid()

I am trying to construct tvtk.RectilinearGridSource from tvtk.RectilinearGrid object in order to add it to the mayavi.engine.
I used to do this:
import mayavi.mlab as mlab
r = tvtk.RectilinearGrid()
r.point_data.scalars = data.ravel()
r.point_data.scalars.name = 'Temperature'
d = mlab.pipline.add_dataset(r)
but instead I would prefer to call it this way:
from mayavi.api import Engine
e = Engine()
e.start()
s = e.new_scene()
src = tvtk.RectilinearGridSource()
and then link src with r i.e., with my RectilinearGrid defined before.
Is there any way to do this ?
I've found an answer:
r = tvtk.RectilinearGrid()
r.point_data.scalars = data.ravel()
r.point_data.scalars.name = 'Temperature'
from mayavi.sources.vtk_data_source import VTKDataSource
src = VTKDataSource(data=r)
e.add_source(d)

AlwaysError when running a testbench on a synchronizer

I encountered this error when running a testbench, together with a synchronizer built on two existing D-FFs.
File "/home/runner/design.py", line 28, in Sync
#always_seq(clk.posedge, reset=reset)
File "/usr/share/myhdl-0.8/lib/python/myhdl/_always_seq.py", line 76, in _always_seq_decorator
raise AlwaysSeqError(_error.ArgType)
myhdl.AlwaysError: decorated object should be a classic (non-generator) function
My testbench is outlined as follows
from myhdl import *
from random import randrange
HALF_PERIOD = delay(10) ### This makes a 20-ns clock signal
ACTIVE_HIGH = 1
G_DELAY = delay(15)
def Main():
### Signal declaration
clk, d, dout = [Signal(intbv(0)) for i in range(3)]
reset = ResetSignal(1,active=ACTIVE_HIGH,async=True)
### Module Instantiation
S1 = Sync(dout, d, clk,reset)
### Clk generator
#always(HALF_PERIOD)
def ClkGen():
clk.next = not clk
### TB def
#instance
def Driver():
yield(HALF_PERIOD)
reset.next = 0
for i in range(4):
yield(G_DELAY)
d.next = not d
raise StopSimulation
return ClkGen, Driver, S1
m1 = traceSignals(Main)
sim = Simulation(m1)
sim.run()
And my synchronizer is coded as follows.
from myhdl import *
from DFF import *
def Sync(dout,din,clk,reset):
""" The module consists of two FFs with one internal signal
External signals
dout : output
din : input
clk : input
Internal signal:
F2F : output-to-input signal that connects two FFs together
"""
### Connectivity
F2F = Signal(intbv(0))
F1 = DFF(F2F,din,clk,reset)
F2 = DFF(dout,F2F,clk,reset)
### Function
#always_seq(clk.posedge,reset=reset)
def SyncLogic():
if reset:
F2F.next = 0
dout.next = 0
else:
F2F.next = din
yield(WIRE_DELAY)
dout.next = F2F
return SyncLogic
and the FF prototype is coded as follows.
from myhdl import *
def DFF(dout,din,clk,reset):
#always_seq(clk.posedge, reset=reset)
def Flogic():
if reset:
dout.next = 0
else:
dout.next = din
return Flogic
The testbench did work with the similar testbench I coded earlier(with slight modification), but it didn't work when combining two modules together. Please clarify. Thank you.
To model a wire delay, use the "delay" argument in the Signal.
change
#always_seq(clk.posedge,reset=reset)
def SyncLogic():
if reset:
F2F.next = 0
dout.next = 0
else:
F2F.next = din
yield(WIRE_DELAY)
dout.next = F2F
return SyncLogic
to:
dout = Signal(<type>, delay=WIRE_DELAY)
# ...
#always_seq(clk.posedge, reset=reset)
def synclogic():
dout.next = din
With the "always_seq" don't define the reset (it is automatically added). If you want to explicitly define the reset use "#always(clock.posedge, reset.negedge)".

Categories

Resources