Upload csv file in Anvil with uplink - python

I have an error like this:
TypeError: __init__() takes from 1 to 2 positional arguments but 3 were given at <ipython-input 4-66c1c8f89515>, line 8 called from Form1, line 18
this my code in anvil:
class Form1(Form1Template):
def __init__(self, **properties):
# Set Form properties and Data Bindings.
self.init_components(**properties)
def file_loader_1_change(self, file, **event_args):
"""This method is called when a new file is loaded into this FileLoader"""
anvil.server.call('import_csv_data',file)
and this code in jupyter notebook for upload the data to anvil data table:
import pandas as pd
import anvil.tables as tables
from anvil.tables import app_tables
import anvil.media
#anvil.server.callable
def import_csv_data(file):
with anvil.media.TempFile(file, "r") as f:
df = pd.read_csv(f)
for d in df.to_dict(orient="records"):
# d is now a dict of {columnname -> value} for this row
# We use Python's **kwargs syntax to pass the whole dict as
# keyword arguments
app_tables.NilaiTukar.add_row(**d)

I think the error you saw is because you are giving two arguments to anvil.media.TempFile and it is only designed to take one. I replicated your error with a simpler example:
import anvil.media
#anvil.server.callable
def import_csv_data(file):
with anvil.media.TempFile(file, "r") as f:
pass
if __name__ == "__main__":
import_csv_data("fname.txt")
According to the docs you don't need the "r" argument. You should just call:
#anvil.server.callable
def import_csv_data(file):
with anvil.media.TempFile(file) as f:
...
Then it should work for you.

Related

Python unittest to create a mock .json file

I have function that looks like this:
def file1_exists(directory):
file1_path = os.path.join(directory, 'file1.json')
return os.path.exists(file1_path)
def file2_exists(directory):
log_path = os.path.join(directory, 'file2.log')
return os.path.exists(file2_path)
def create_file1(directory):
if file1_exists(directory):
return
if not file2_exists(directory):
return
mod_time = os.stat(os.path.join(directory, 'file2.log')).st_mtime
timestamp = {
"creation_timestamp": datetime.datetime.fromtimestamp(mod_time).isoformat()
}
with open(os.path.join(directory, "file1.json"), "w") as f:
json.dump(timestamp, f)
And I need to create a unittest that uses mock files.
The 3 Unittests that I need are:
A mock myfile.json file where I will assert that the function will return None (based on the 1st if statement, since the file exists)
A way to mock-hide the data.txt item in order to assert that the function will return None (based on the second if statement)
A mock myfile.json file where I write the required data and then assert that the return matches the expected outcome.
So far I've tried tests 1. and 2. with variations of this but I've been unsuccessful:
class TestAdminJsonCreation(unittest.TestCase):
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
I've also read about other solutions such as:
Python testing: using a fake file with mock & io.StringIO
But I haven't found a way to successfully do what I need...
You want to be able to provide different return values for each call to os.path.exists. Since you know the order of the calls, you can use side_effects to supply a list of values to be used in order.
class TestAdminJsonCreation(unittest.TestCase):
# No JSON file
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, log file
#patch('os.path.exists', side_effects=[True, False])
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, no log file
#patch('os.path.exists', side_effects=[True, True])
def test_existing_admin_json(self):
...
The third test requires an actual file system, or for you to mock open.
So, I ended up breaking my original function into 3 different functions for easier testing.
The tests are performed by checking what the result of the 'def create_file1' would be when we feed it different return_values from the other 2 functions and when we add valid data.
class TestFile1JsonCreation(unittest.TestCase):
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=True)
#patch('file2_exists', return_value=False)
def test_existing_file1_json(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=False)
def test_missing_file2(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=True)
def test_write_data(self, file2_exists, file1_existsmock, stat, mopen):
class FakeStat:
st_mtime = 1641992788
stat.return_value = FakeStat()
create_file1('.')
# file1.json should have been written
mopen.assert_called_once_with('./file1.json', 'w')
written_data = ''.join(
c[1][0]
for c in mopen().__enter__().write.mock_calls
)
expected_data = {"creation_timestamp": "2022-01-12T13:06:28"}
written_dict_data = json.loads(written_data)
self.assertEqual(written_dict_data, expected_data)

Dynamically call functions in Python

I am trying to test a Python library called Tulip dynamically. To do it I need to call the proper ti.<indicator_name> and pass the arguments to call the method.
The problem is, each method has fixed number of parameters and I don't know how to pass it properly.
Let's say I want to test ti.sma method that requires two arguments real and period
def sma(real, period):
"""
Simple Moving Average
"""
return lib.sma([real], [period])
So, I would need to call it as:
sma_test = ti.sma(real=[25,20,22,30,22,28,25,30,21,23,24,22], period=5)
So my question is, how do I call the method above dynamically using the json below as payload?
{
"short_name": "sma",
"data": [74.333511, 73.61084, 74.197395, 73.848442, 75.036385, 76.630219, 76.803459, 77.385063],
"period": 5
}
I have made this validator where I could get the function object but how about the parameters?
import pandas as pd
import numpy as np
import tulipy as ti
import datetime
import uuid
import inspect
def validate_indicator(payload):
data = np.array(payload.get('data'))
try:
indicator = getattr(ti, payload.get('short_name'))
validation_test = indicator(data)
If I run the code above I get the Exception TypeError obviously because I didn't pass the required argument period in output = indicator(data). I believe the way to get there is to make a new function with optional *args
validate_indicator(
{
"short_name": "sma",
"data": [74.333511, 73.61084, 74.197395, 73.848442, 75.036385, 76.630219, 76.803459, 77.385063],
"period": 5
}
)
Result:
"sma() missing 1 required positional argument: 'period'"
Another example, if I want to test ti.bbands that requires real, period and stddev as arguments.
def bbands(real, period, stddev):
"""
Bollinger Bands
"""
return lib.bbands([real], [period, stddev])
You actually can use **kwargs:
File test.py
import test2
data = {
"short_name": "sma",
"data": [74.333511, 73.61084, 74.197395, 73.848442, 75.036385, 76.630219, 76.803459, 77.385063],
"period": 5
}
f = getattr(test2, data.pop('short_name'))
f(**data)
File test2.py:
def sma(data, period):
print(data)
print(period)
> python3 test.py
[74.333511, 73.61084, 74.197395, 73.848442, 75.036385, 76.630219, 76.803459, 77.385063]
5
Note:
If you want to use *args, you could call the function as:
f(*[value for value in data.values()])
Edit
This would be a function that accepts the data dict as a parameter and calls the corresponding function for you:
def validate_function(data):
f = getattr(ti, data.pop('short_name'))
f(**data)

Issue in mocking python unit test

I have written a test as below:
class TestLoader(TestCase):
#pytest.fixture(autouse=True)
#patch('loaders.myloader.DSFactory')
def _initialize_(self, mock_ds_factory):
self.loader = MyLoader()
mock_ds = Mock()
mock_ds_factory.get_ds_for_env.return_value = mock_ds
self.loader.ds = mock_ds
def test_load(self):
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
self.loader.load("test_s3_key") #####IN THIS LINE I AM GETTING ERROR AS MENTIONED BELOW##
#staticmethod
def get_data():
return {"key1":"value1","key2":"value2"}
Associated source is located here: loaders->myloader.py. myloader.py is as follows:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
print(f"ds : {self.ds}")
print(f"file read is : {self.ds.read_file(S3_BUCKET, file_key)}"}
data_dict = json.loads(self.ds.read_file(S3_BUCKET, file_key))
But while testing, I am getting error as follows:
ds is :<MagicMock name='DSFactory.get_ds_for_env()' id='140634163567528'>
file read is :<MagicMock name='DSFactory.get_ds_for_env().read_file()' id='140635257259568'>
E TypeError: the JSON object must be str, bytes or bytearray, not 'MagicMock'
I don't understand why, even after mocking return value of read_file with
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
I am getting MagickMock object. I am stuck, not getting any clue how to resolve this.
Your code:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
data_dict = json.loads(self.datastore.read_file(S3_BUCKET, file_key))
Issue here i can see is, data-store is not present, it should be self.ds.read_file
Please print self.datastore.read_file(S3_BUCKET, file_key) and verify the output.
This is the error coming from AWS_S3 bucket Json structure. It seems its not sending the Json value in string format rather than in Magic Mock format.
To more about Magic Mock format, please visit here: https://medium.com/ryans-dev-notes/python-mock-and-magicmock-b3295c2cc7eb

How to mock and test python open and pandas to_pickle

I am trying to test this function that takes a pandas dataframe row which it uses to make an ftp call saved to csv, opens that csv file, formats it, and saves it as a pickle.
I want to test the following:
builtins.open is called once with (path_to_raw, 'wb')
to_pickle is called once with (LOCAL_PKL.format(row.name))
Patching builtins.open does not seem to work since it is called indirectly by to_pickle, so the tests fail as builtins.open is called twice.
Function to Test:
def download_file(row):
path_from = row['source']
path_to_raw = LOCAL_RAW.format(row.name)
self.connection = FTP(self.url)
self.connection.login(self.username, self.password)
with open(path_to_raw, 'wb') as f:
self.connection.retrbinary('RETR ' + path_from, f.write)
self.connection.quit()
data = pd.read_csv(path_to_raw)
data.columns = ['a','b','c']
data.to_pickle(LOCAL_PKL.format(row.name))
Unit Tests:
import pandas as pd
import unittest.mock as mock
from unittest.mock import patch, mock_open, MagicMock, call
import maintain
#patch('builtins.open', create=True)
#patch('maintain.pd.read_csv')
def test_download_path(self, mock_open, mock_pd_read_csv):
mock_pd_read_csv.return_value = pd.DataFrame()
#mock.create_autospec
def mock_pd_to_pickle(self, path):
pass
with patch.object(pd.DataFrame, 'to_pickle', mock_pd_to_pickle):
real = maintain.DataFTP()
real.connection = MagicMock(name='connection')
row = pd.Series(data=['a','b'], index=['c','d'])
row.name = 'anything'
print(mock_open.assert_called_once_with(maintain.LOCAL_RAW.format(row.name), 'wb'))
print(mock_pd_to_pickle.assert_called_once_with(maintain.LOCAL_PKL.format(row.name)))
So... this is clear wrong, but I'm not sure why.
This test produces this error:
AssertionError: Expected 'read_csv' to be called once. Called 0 times.
Does anyone have any suggestions or know how to solve this.
Thank you!
I finally got it working with this:
#patch('builtins.open', new_callable=mock_open)
#patch('maintain.pd.read_csv', return_value=pd.DataFrame())
#patch.object(pd.DataFrame, 'to_pickle')
def test_download_path(self, mock_to_pickle, mock_read_csv, mock_open):
real = maintain.EODDataFTP()
real.connection = mock.Mock(name='connection')
row = pd.Series(data=['','nyse'], index=['source','exchange'])
row.name = 'anything'
real.download_file(row)
mock_open.assert_called_once_with(maintain.LOCAL_RAW.format(row.name), 'wb')
mock_read_csv.assert_called_once()
mock_to_pickle.assert_called_once_with(maintain.LOCAL_PKL.format(row.name))

mrjob with JSON data

Me and a friend of mine are working on a rather large JSON file. We want to perform MapReduce on parts of this file, being as speedy as possible. As it appears to be hard to feed a JSON file directly into a "mrjob job", we attempted to write the needed data into a text file (where each line is an array element, exctracted from the json). This intermediate step takes way too much time, because of disc write operations.
Below is an example of our mrjob test file.
from mrjob.job import MRJob
import json
class ReduceData(MRJob):
def mapper(self, _, line):
lineJSON = json.loads(line)
yield lineJSON[2], 1
def reducer(self, key, values):
yield key, sum(values)
if __name__ == '__main__':
ReduceData.run()
The code above is ran as follows:
$ python reducedata.py data.txt
read_json is illustrated below
import ijson
f = open('testData.json')
parser = ijson.parse(f)
if __name__ == '__main__':
for prefix, event, value in parser:
if (prefix, event) == ('data.item', 'start_array'):
item = []
elif prefix == 'data.item.item' and value is not None:
item.append(value)
elif (prefix, event) == ('data.item', '
item = []
# yield data as output, or something?
With the above mentioned, I have two questions:
1) Is there a way to provide the output from read_json.py as input into reducedata.py without performing write to disc operations?
2) If 1) is possible, how to I specify the output? mrjob expects a file, and invokes the mapper line by line. Each yield (bottom comment) in read_json.py is supposed to be a "line".
Thanks in advance!
-Superdids

Categories

Resources