Error while writing unit tests for nested function in python

Error while writing unit tests for nested function in python - python

I have a function which reads data from db, modifies it and writes it back to another db. I am trying to write unit tests for this
import file1
import file2
def update_func(query):
max_date = file1.get_cutover_date()
data_dataframe = file2.get_dataframe_from_db(query)
transformed_dataframe = modify_df(data_dataframe)
df1 = transformed_dataframe.filter(transformed_dataframe['date']>max_date)
write_to_db(df1)
I tried to patch the function calls.
#patch('file2.get_dataframe_from_db')
#patch('file1.get_cutover_date')
test_update_func(patched_get_cutover, patched_get_df):
patched_get_cutover.return_value = '2015-01-01 00:00:00'
response = [Row(id=1,date='2016-10-12 00:00:00')]
response_df = spark.createDataFrame(response)
patched_get_df.return_value = response_df
update_func('test_query')
patched_get_cutover.assert_called
patched_get_df.assert_called
But I am getting TypeError: '>' not supported between instances of MagicMock and MagicMock

Related

How to solve great expectations "MetricResolutionError: Cannot compile Column object until its 'name' is assigned." Error?

I am trying to use great expectations, The function i want to use is "expect_compound_columns_to_be_unique".
This is the code (main code - template);
import datetime
import pandas as pd
import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError
context = ge.data_context.DataContext()
# Note that if you modify this batch request, you may save the new version as a .json file
# to pass in later via the --batch-request option
batch_request = {'datasource_name': 'impala_okh', 'data_connector_name': 'default_inferred_data_connector_name', 'data_asset_name': 'okh.okh_forecast_prod', 'limit': 1000}
# Feel free to change the name of your suite here. Renaming this will not remove the other one.
expectation_suite_name = "okh_forecast_prod"
try:
suite = context.get_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f'Loaded ExpectationSuite "{suite.expectation_suite_name}" containing {len(suite.expectations)} expectations.')
except DataContextError:
suite = context.create_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f'Created ExpectationSuite "{suite.expectation_suite_name}".')
validator = context.get_validator(
batch_request=BatchRequest(**batch_request),
expectation_suite_name=expectation_suite_name
)
column_names = [f'"{column_name}"' for column_name in validator.columns()]
print(f"Columns: {', '.join(column_names)}.")
validator.head(n_rows=5, fetch_all=False)
the function (error in here);
validator.expect_compound_columns_to_be_unique(['column1', 'column2'])
Then i am getting following error;
MetricResolutionError: Cannot compile Column object until its 'name' is assigned.
How can i solve this problem?

How to created pandasData for backtrader with ccxt

I am trying to back test a strategy using backtrader. Most of the examples I have seen only are using a csv file. I was wondering if it is possible to just get data from an exchange and turn it into pandas dataframe and then use backtrader? When I run it I get an error AttributeError: 'numpy.int64' object has no attribute 'lower' which is refers to pandafeed.py
import ccxt,os
from dotenv import load_dotenv
import backtrader
class Trader:
def __init__(self) -> None:
load_dotenv()
self.connect()
""" Creates Binance client """
def connect(self):
self.exchange = ccxt.binance({
'apiKey': os.getenv('BINANCE_API_KEY'),
'secret': os.getenv('BINANCE_API_SECRET')
})
klines = Trader().exchange.fetch_ohlcv(symbol=trading_pair,timeframe=interval)
dataFrame = pd.DataFrame(klines)
dataFrame[0] = [datetime.fromtimestamp(t/1000) for t in dataFrame[0]]
data = backtrader.feeds.PandasData(dataname=dataFrame)
cerebro = backtrader.Cerebro()
cerebro.broker.set_cash(10000)
cerebro.adddata(data)
cerebro.run()
If I use a column name and change my code to the below
colums = ['datetime', 'open','high', 'low', 'close', 'volume']
dataFrame = pd.DataFrame(klines, columns=colums)
dataFrame["datetime"] = [datetime.fromtimestamp(t/1000) for t in dataFrame["datetime"]]
data = backtrader.feeds.PandasData(dataname=dataFrame)
I get this error
AttributeError: 'int' object has no attribute 'to_pydatetime'
pandafeed.py
My question is:
how do I turn a list into something which I can use to run backtrader? thank you.
P.S. an example data structure return by klines will be like
[
[1621152000000, 49375.28, 49795.89, 48656.0, 49014.99, 10956.006583],
[1621166400000, 49014.99, 49249.06, 47566.01, 47727.26, 14166.961995],
[1621180800000, 47727.26, 48097.59, 44444.44, 45549.26, 36819.653456],
[1621195200000, 45553.24, 46480.0, 43825.39, 46431.5, 28724.055984],
[1621209600000, 46426.83, 46686.0, 42777.0, 42915.46, 28171.858447],
[1621224000000, 42915.46, 45400.0, 42196.97, 45149.18, 40557.45817],
[1621238400000, 45143.28, 45800.0, 44291.84, 45731.39, 23851.50751],
[1621252800000, 45733.55, 45791.04, 43156.0, 43362.75, 23137.989315],
[1621267200000, 43357.0, 44400.0, 42001.0, 44197.73, 30883.162039],
[1621281600000, 44197.73, 44939.2, 42500.0, 43538.04, 20055.197255],
[1621296000000, 43538.02, 45281.34, 43150.79, 44779.83, 19252.919453],
[1621310400000, 44774.78, 45799.29, 44738.26, 45172.7, 17218.430549],
[1621324800000, 45172.69, 45420.0, 44607.08, 45225.71, 8427.020047]
]

I think backtrader is trying to read headers where there are none. Try telling PandasData there are no headers. See the docs
data = backtrader.feeds.PandasData(dataname=dataFrame, header=None)

I got the same issue, and after debug it i found it append because backtrader expect your dataframe to be indexed by the datetime (to make it simple).
Then to solve it, just add to your code:
...
dataFrame = dataFrame.set_index('datetime')
...
In my case I also had also to change the type of the 'datetime' field and I did by:
...
dataFrame["datetime"] = dataFrame["datetime"].values.astype(dtype='datetime64[ms]')
dataFrame = dataFrame.set_index('datetime')
...
I hope that will help you, even if you asked that question 2 months ago.

How to mock and test python open and pandas to_pickle

I am trying to test this function that takes a pandas dataframe row which it uses to make an ftp call saved to csv, opens that csv file, formats it, and saves it as a pickle.
I want to test the following:
builtins.open is called once with (path_to_raw, 'wb')
to_pickle is called once with (LOCAL_PKL.format(row.name))
Patching builtins.open does not seem to work since it is called indirectly by to_pickle, so the tests fail as builtins.open is called twice.
Function to Test:
def download_file(row):
path_from = row['source']
path_to_raw = LOCAL_RAW.format(row.name)
self.connection = FTP(self.url)
self.connection.login(self.username, self.password)
with open(path_to_raw, 'wb') as f:
self.connection.retrbinary('RETR ' + path_from, f.write)
self.connection.quit()
data = pd.read_csv(path_to_raw)
data.columns = ['a','b','c']
data.to_pickle(LOCAL_PKL.format(row.name))
Unit Tests:
import pandas as pd
import unittest.mock as mock
from unittest.mock import patch, mock_open, MagicMock, call
import maintain
#patch('builtins.open', create=True)
#patch('maintain.pd.read_csv')
def test_download_path(self, mock_open, mock_pd_read_csv):
mock_pd_read_csv.return_value = pd.DataFrame()
#mock.create_autospec
def mock_pd_to_pickle(self, path):
pass
with patch.object(pd.DataFrame, 'to_pickle', mock_pd_to_pickle):
real = maintain.DataFTP()
real.connection = MagicMock(name='connection')
row = pd.Series(data=['a','b'], index=['c','d'])
row.name = 'anything'
print(mock_open.assert_called_once_with(maintain.LOCAL_RAW.format(row.name), 'wb'))
print(mock_pd_to_pickle.assert_called_once_with(maintain.LOCAL_PKL.format(row.name)))
So... this is clear wrong, but I'm not sure why.
This test produces this error:
AssertionError: Expected 'read_csv' to be called once. Called 0 times.
Does anyone have any suggestions or know how to solve this.
Thank you!

I finally got it working with this:
#patch('builtins.open', new_callable=mock_open)
#patch('maintain.pd.read_csv', return_value=pd.DataFrame())
#patch.object(pd.DataFrame, 'to_pickle')
def test_download_path(self, mock_to_pickle, mock_read_csv, mock_open):
real = maintain.EODDataFTP()
real.connection = mock.Mock(name='connection')
row = pd.Series(data=['','nyse'], index=['source','exchange'])
row.name = 'anything'
real.download_file(row)
mock_open.assert_called_once_with(maintain.LOCAL_RAW.format(row.name), 'wb')
mock_read_csv.assert_called_once()
mock_to_pickle.assert_called_once_with(maintain.LOCAL_PKL.format(row.name))

Add metadata to TestCase in Python's unittest

I'd like to add metadata to individual tests in a TestCase that I've written to use Python's unittest framework. The metadata (a string, really) needs to be carried through the testing process and output to an XML file.
Other than remaining with the test the data isn't going to be used by unittest, nor my test code. (I've got a program that will run afterwards, open the XML file, and go looking for the metadata/string).
I've previously used NUnit which allows one to use C# attribute to do this. Specifically, you can put this above a class:
[Property("SmartArrayAOD", -3)]
and then later find that in the XML output.
Is it possible to attach metadata to a test in Python's unittest?

Simple way for just dumping XML
If all you want to do is write stuff to an XML file after every unit test, just add a tearDown method to your test class (e.g. if you have , give it a).
class MyTest(unittest.TestCase):
def tearDown(self):
dump_xml_however_you_do()
def test_whatever(self):
pass
General method
If you want a general way to collect and track metadata from all your tests and return it at the end, try creating an astropy table in your test class's __init__() and adding rows to it during tearDown(), then extracting a reference to your initialized instances of your test class from unittest, like this:
Step 1: set up a re-usable subclass of unittest.TestCase so we don't have to duplicate the table handling
(put all the example code in the same file or copy the imports)
"""
Demonstration of adding and retrieving meta data from python unittest tests
"""
import sys
import warnings
import unittest
import copy
import time
import astropy
import astropy.table
if sys.version_info < (3, 0):
from StringIO import StringIO
else:
from io import StringIO
class DemoTest(unittest.TestCase):
"""
Demonstrates setup of an astropy table in __init__, adding data to the table in tearDown
"""
def __init__(self, *args, **kwargs):
super(DemoTest, self).__init__(*args, **kwargs)
# Storing results in a list made it convenient to aggregate them later
self.results_tables = [astropy.table.Table(
names=('Name', 'Result', 'Time', 'Notes'),
dtype=('S50', 'S30', 'f8', 'S50'),
)]
self.results_tables[0]['Time'].unit = 'ms'
self.results_tables[0]['Time'].format = '0.3e'
self.test_timing_t0 = 0
self.test_timing_t1 = 0
def setUp(self):
self.test_timing_t0 = time.time()
def tearDown(self):
test_name = '.'.join(self.id().split('.')[-2:])
self.test_timing_t1 = time.time()
dt = self.test_timing_t1 - self.test_timing_t0
# Check for errors/failures in order to get state & description. https://stackoverflow.com/a/39606065/6605826
if hasattr(self, '_outcome'): # Python 3.4+
result = self.defaultTestResult() # these 2 methods have no side effects
self._feedErrorsToResult(result, self._outcome.errors)
problem = result.errors or result.failures
state = not problem
if result.errors:
exc_note = result.errors[0][1].split('\n')[-2]
elif result.failures:
exc_note = result.failures[0][1].split('\n')[-2]
else:
exc_note = ''
else: # Python 3.2 - 3.3 or 3.0 - 3.1 and 2.7
# result = getattr(self, '_outcomeForDoCleanups', self._resultForDoCleanups) # DOESN'T WORK RELIABLY
# This is probably only good for python 2.x, meaning python 3.0, 3.1, 3.2, 3.3 are not supported.
exc_type, exc_value, exc_traceback = sys.exc_info()
state = exc_type is None
exc_note = '' if exc_value is None else '{}: {}'.format(exc_type.__name__, exc_value)
# Add a row to the results table
self.results_tables[0].add_row()
self.results_tables[0][-1]['Time'] = dt*1000 # Convert to ms
self.results_tables[0][-1]['Result'] = 'pass' if state else 'FAIL'
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=astropy.table.StringTruncateWarning)
self.results_tables[0][-1]['Name'] = test_name
self.results_tables[0][-1]['Notes'] = exc_note
Step 2: set up a test manager that extracts metadata
def manage_tests(tests):
"""
Function for running tests and extracting meta data
:param tests: list of classes sub-classed from DemoTest
:return: (TextTestResult, Table, string)
result returned by unittest
astropy table
string: formatted version of the table
"""
table_sorting_columns = ['Result', 'Time']
# Build test suite
suite_list = []
for test in tests:
suite_list.append(unittest.TestLoader().loadTestsFromTestCase(test))
combo_suite = unittest.TestSuite(suite_list)
# Run tests
results = [unittest.TextTestRunner(verbosity=1, stream=StringIO(), failfast=False).run(combo_suite)]
# Catch test classes
suite_tests = []
for suite in suite_list:
suite_tests += suite._tests
# Collect results tables
results_tables = []
for suite_test in suite_tests:
if getattr(suite_test, 'results_tables', [None])[0] is not None:
results_tables += copy.copy(suite_test.results_tables)
# Process tables, if any
if len(results_tables):
a = []
while (len(a) == 0) and len(results_tables):
a = results_tables.pop(0) # Skip empty tables, if any
results_table = a
for rt in results_tables:
if len(rt):
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=DeprecationWarning)
results_table = astropy.table.join(results_table, rt, join_type='outer')
try:
results_table = results_table.group_by(table_sorting_columns)
except Exception:
print('Error sorting test results table. Columns may not be in the preferred order.')
column_names = list(results_table.columns.keys())
alignments = ['<' if cn == 'Notes' else '>' for cn in column_names]
if len(results_table):
rtf = '\n'.join(results_table.pformat(align=alignments, max_width=-1))
exp_res = sum([result.testsRun - len(result.skipped) for result in results])
if len(results_table) != exp_res:
print('ERROR forming results table. Expected {} results, but table length is {}.'.format(
exp_res, len(results_table),
))
else:
rtf = None
else:
results_table = rtf = None
return results, results_table, rtf
Step 3: Example usage
class FunTest1(DemoTest):
#staticmethod
def test_pass_1():
pass
#staticmethod
def test_fail_1():
assert False, 'Meant to fail for demo 1'
class FunTest2(DemoTest):
#staticmethod
def test_pass_2():
pass
#staticmethod
def test_fail_2():
assert False, 'Meant to fail for demo 2'
res, tab, form = manage_tests([FunTest1, FunTest2])
print(form)
print('')
for r in res:
print(r)
for error in r.errors:
print(error[0])
print(error[1])
Sample results:
$ python unittest_metadata.py
Name Result Time Notes
ms
-------------------- ------ --------- ----------------------------------------
FunTest2.test_fail_2 FAIL 5.412e-02 AssertionError: Meant to fail for demo 2
FunTest1.test_fail_1 FAIL 1.118e-01 AssertionError: Meant to fail for demo 1
FunTest2.test_pass_2 pass 6.199e-03
FunTest1.test_pass_1 pass 6.914e-03
<unittest.runner.TextTestResult run=4 errors=0 failures=2>
Should work with python 2.7 or 3.7. You can add whatever columns you want to the table. You can add parameters and stuff to the table in setUp, tearDown, or even during the tests.
Warnings:
This solution accesses a protected attribute _tests of unittest.suite.TestSuite, which can have unexpected results. This specific implementation works as expected for me in python2.7 and python3.7, but slight variations on how the suite is built and interrogated can easily lead to strange things happening. I couldn't figure out a different way to extract references to the instances of my classes that unittest uses, though.

Subscripting sqlalchemy table columns

I am trying to write a class that will look for certain column types in a sqlalchemy reflected table and then do some operations for a subset of columns based on the data type.
I can correctly reflect the table and grab a list of the 'date' type columns as shown in the date_types list. However, when it gets to table[name] the function fails with the error:
*** TypeError: 'DeclarativeMeta' object is not subscriptable
If I use dot subscripts instead of square brackets i.e. table.col_name I can access the table column attribute but I don't see how I would iterate over the attribute list using that syntax.
Here is my class:
from pdb import set_trace
class dateRangeProfiler():
def __init__(self, session):
self.date_ranges = {}
self.date_types = [Date(), DateTime(), TIMESTAMP()]
self.session = session
print('date data types: ', str(self.date_types))
def __call__(self, table):
date_columns = self.getDateColumns(table)
print(date_columns)
date_column_profile = self.profileColumns(table, date_columns)
return date_column_profile
def getDateColumns(self, table):
columns = [(c.name, c.type) for c in table.__table__.columns if str(c.type) in [str(dt) for dt in self.date_types]]
return columns
def profileColumns(self, table, date_cols):
profile = {}
for (name, _) in date_cols:
set_trace()
print(name)
qry = self.session.query(func.max(table[name]).label("max_date"),
func.min(testTable[name]).label("min_date"),) # <-- fails here
res = qry.one()
max = res.max_date
min = res.min_date
profile.append({name: {'max':max, 'min':min}})
Here is how I call the profiler:
date_range_profiler = dateRangeProfiler(sess)
date_range_profiler(my_table)
And the error:
*** TypeError: 'DeclarativeMeta' object is not subscriptable

The issue doesn't have anything to do with the sqlalchemy module. When accesing attributes of object using variable references, use the getattr() base python function.
qry = self.session.query(func.max(getattr(table,name)).label("max_date"),
func.min(getattr(table,name)).label("min_date"),)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Error while writing unit tests for nested function in python - python

Related

How to solve great expectations "MetricResolutionError: Cannot compile Column object until its 'name' is assigned." Error?

How to created pandasData for backtrader with ccxt

How to mock and test python open and pandas to_pickle

Add metadata to TestCase in Python's unittest

Subscripting sqlalchemy table columns

Categories

Resources