I would like to pass a dataframe to #pytest.mark.parametrize. The dataframes are stored on conftest.py. The unit tests that do not use #pytest.mark.parametrize that reference the dataframes successfully execute.
However, when I apply #pytest.mark.parameterize, the code returns TypeError: 'function' object is not subscriptable
The dataframes are developed as functions in conftest.py. For example:
#pytest.fixture(scope="module")
def df_vartypes():
data = {
"Name": ["tom", "nick", "krish", "jack"],
"City": ["London", "Manchester", "Liverpool", "Bristol"],
"Age": [20, 21, 19, 18],
"Marks": [0.9, 0.8, 0.7, 0.6],
"dob": pd.date_range("2020-02-24", periods=4, freq="T"),
}
df = pd.DataFrame(data)
return df
The unit tests:
_cat_num_vars = [
(df_enc, "var_A", ["var_A"], []),
(df_enc_numeric, "var_B", [], ["var_B"]),
# TODO: Datetime test
(df_vartypes, None, ["Name", "City"], ["Age", "Marks"]),
(df_enc_numeric, None, [], ["var_A", "var_B", "target"])
]
#pytest.mark.parametrize(
"_df, _variables, _categorical_vars, _numerical_vars", _cat_num_vars
)
def test_find_categorical_and_numeric_vars_pass_diff_var_permutations(
_df, _variables, _categorical_vars, _numerical_vars
):
assert (_find_categorical_and_numerical_variables(
_df, _variables) == (_categorical_vars, _numerical_vars)
)
Traceback:
X = <function df_vartypes at 0x7fa8a1647310>, variables = None
def _find_categorical_and_numerical_variables(
X: pd.DataFrame, variables: Variables = None
) -> Tuple[List[Union[str, int]], List[Union[str, int]]]:
"""
Find numerical and categorical variables.
Parameters
----------
X : pandas DataFrame
variables : List of variables. Defaults to None.
Returns
-------
variables : Tuple with List of numerical and list of categorical variables.
"""
# If the user passes just 1 variable outside a list.
if isinstance(variables, (str, int)):
if is_categorical(X[variables]) or is_object(X[variables]):
variables_cat = [variables]
variables_num = []
elif is_numeric(X[variables]):
variables_num = [variables]
variables_cat = []
else:
raise TypeError("The variable entered is neither numerical "
"nor categorical.")
# If user leaves default None parameter.
elif variables is None:
# find categorical variables
if variables is None:
variables_cat = [
column
> for column in X.select_dtypes(include=["O", "category"]).columns
if _is_categorical_and_is_not_datetime(X[column])
]
E AttributeError: 'function' object has no attribute 'select_dtypes'
feature_engine/variable_manipulation.py:321: AttributeError
Related
I am working on something which should manage multiple water dispensers. I need to get some data from a json file and then load it into objects after that, append the objects to a list. For some reason list.append changes other object's parameters(more specific, location). Here is my code:
WaterDispenser.py
class WaterDispenser():
def __init__(self, id: int = -1, status: bool = False, location: list=[-1, -1]) -> None:
self.id = id
self.status = status
self.location = location
def Dump(self) -> dict:
"""Dumps the propoerties in a json dictionary
Returns
-------
dict
A dictionary with a collection of propoerties and their names
"""
return {"id": self.id, "status": self.status, "location":[self.location[0], self.location[1]]}
def Load(self, object: dict) -> None:
"""Loads the json dictoinary in memory
Parameters
----------
object : dict, required
The json file with the properties of the dispenser
Returns
-------
None
"""
self.id = object["id"]
self.status = object["status"]
self.location[0] = object["location"][0]
self.location[1] = object["location"][1]
return None
main.py
import json
from WaterDispenser import WaterDispenser
dispensers = []
def LoadDispensers(path: str = "dispensers.json") -> int:
"""Loads the json file in memory.
Parameters
---------
path : str, optional
The path of the file to be loaded. Defaults to "dispensers.json".
Returns
-------
int
Count of dispensers data loaded
"""
global dispensers
dispensers = []
data = json.load(open(path, "r"))
for d in data:
x = WaterDispenser()
x.Load(d)
dispensers.append(x)
return len(dispensers)
if __name__ == '__main__':
print(LoadDispensers())
print([o.Dump() for o in dispensers])
dispensers.json
[
{"id": 0, "status": true, "location": [0, 0]},
{"id": 1, "status": true, "location": [0, 1]},
{"id": 2, "status": false, "location": [1, 1]}
]
Output:
3
[{'id': 0, 'status': True, 'location': [1, 1]}, {'id': 1, 'status': True, 'location': [1, 1]}, {'id': 2, 'status': False, 'location': [1, 1]}]
The functional answer:
Change the init of WaterDispenser to
from typing import Optional
class WaterDispenser():
def __init__(self, id: int = -1, status: bool = False, location: Optional[list] = None) -> None:
self.id = id
self.status = status
self.location = location or [-1, -1]
This should result in the expected response of
3
[{'id': 0, 'status': True, 'location': [0, 0]}, {'id': 1, 'status': True, 'location': [0, 1]}, {'id': 2, 'status': False, 'location': [1, 1]}]
The why:
Generally you want to avoid using mutable values as kwarg values because they're pre-computed (so your default location argument was technically the same object in memory across your WaterDispenser instances). append wasn't the culprit here and you can read more about this all via this SO discussion or read a succinct explanation via this answer to a similar question.
Design note:
It's worth noting that the way you are using Load in the above example could just be folded into WaterDispenser.__init__, so something like
from typing import Dict, Any
class WaterDispenser():
def __init__(self, data: Dict[Any]) -> None:
self.id = data.get("id", -1)
self.status = data.get("status", False)
self.location = data.get("location", [-1, -1])
or if you want to avoid typing
class WaterDispenser():
def __init__(self, data: dict) -> None:
self.id = data.get("id", -1)
self.status = data.get("status", False)
self.location = data.get("location", [-1, -1])
That example still includes your default values but if you removed the secondary arguments from those get calls you could protect against missing data at runtime without having to check to see if you had, say, an impossible location data point like [-1, -1].
I have a json config, based on user input, need to filter out the config and get only specific section. I tried running the code mentioned below, it returns the partially expected results.
Config:
superset_config = """
[ {
"Area":"Texas",
"Fruits": {
"RED": {
"Apple":["val1"],
"Grapes":["green"]
},
"YELLOW": {"key2":["val2"]}
}
},
{
"Area":"Dallas",
"Fruits": {
"GREEN": { "key3": ["val3"]}
}
}
]
"""
User Input:
inputs = ['Apple'] # input list
Code:
import json
derived_config = []
for each_src in json.loads(superset_config):
temp = {}
for src_keys in each_src:
if src_keys=='Fruits':
temp_inner ={}
for key,value in each_src[src_keys].items():
metrics = {key_inner:value_inner for key_inner,value_inner in value.items() if key_inner in inputs}
temp_inner[key]=metrics
temp[src_keys] = temp_inner
else:
temp[src_keys] = each_src[src_keys]
derived_config.append(temp)
what do I get from above code:
derived_config= [
{'Area': 'Texas',
'Fruits': {'RED': {'Apple': 'val1'},
'YELLOW': {}
}
},
{'Area': 'Dallas',
'Fruits': {'GREEN': {}
}
}
]
what is needed: I need below results
derived_config= [
{'Area': 'Texas',
'Fruits': {'RED': {'Apple': 'val1'}
}
}
]
can anyone please help? thanks.
Maybe something like this:
import json
inputs = ['Apple'] # input list
derived_config = []
for each_src in json.loads(superset_config):
filtered_fruits = {k: v for k, v in (each_src.get('Fruits') or {}).items()
if any(input_ in v for input_ in inputs)}
if filtered_fruits:
each_src['Fruits'] = filtered_fruits
derived_config.append(each_src)
print(derived_config)
Edit: Based on the comments, it looks like you might want to filter the inner Fruits map based on the input list of fruits as well. In that case, we don't need to use the any function as above.
There is also an unintentional risk that we might mutate the original source config. For example, if you save the result of json.loads(superset_config) to a variable and then try to filter multiple fruits from it, likely it'll mutate the original config object. If you are directly calling jsons.load each time, then you don't need to worry about mutating the object; however you need to be aware that due to list and dict being mutable types in Python, this can be a concern to us.
The solution below does a good job of eliminating a possibility of mutating the original source object. But again, if you are calling jsons.load each time anyway, then you don't need to worry about this and you are free to modify the original config object.
import json
# Note: If you are using Python 3.9+, you can just use the standard collections
# for `dict` and `list`, as they now support parameterized values.
from typing import Dict, Any, List
# The inferred type of the 'Fruits' key in the superset config.
# This is a mapping of fruit color to a `FruitMap`.
Fruits = Dict[str, 'FruitMap']
FruitMap = Dict[str, Any]
# The inferred type of the superset config.
Config = List[Dict[str, Any]]
def get_fruits_config(src_config: Config, fruit_names: List[str]) -> Config:
"""
Returns the specified fruit section(s) from the superset config.
"""
fruits_config: Config = []
final_src: Dict
for each_src in src_config:
fruits: Fruits = each_src.get('Fruits') or {}
final_fruits: Fruits = {}
for fruit_color, fruit_map in fruits.items():
desired_fruits = {fruit: val for fruit, val in fruit_map.items()
if fruit in fruit_names}
if desired_fruits:
final_fruits[fruit_color] = desired_fruits
if final_fruits:
final_src = each_src.copy()
final_src['Fruits'] = final_fruits
fruits_config.append(final_src)
return fruits_config
Usage:
inputs = ['Apple'] # input list
config = json.loads(superset_config)
derived_config = get_fruits_config(config, inputs)
print(derived_config)
# prints:
# [{'Area': 'Texas', 'Fruits': {'RED': {'Apple': ['val1']}}}]
I running some unit tests for a method with mock objects. In the method, attributes are set, but I can't seem to access them in the unit test. When I try I get back a mock object, not the string I am trying to access
Here is my unit test
#mock.patch("bpy.data.cameras.new")
def test_load_camera(self, mock_camera_data):
loader = self.SceneLoader(self.json_data)
self.mock_bpy.context.scene.objects.link.return_value = 5
cam_data = {"name": "camera 1",
"type": "PERSP",
"lens_length": 50.0,
"lens_unit": "MILLIMETERS",
"translation": [
4.5,
74,
67
],
"rotation": [
-0.008,
-0.002,
0.397,
0.918
]
}
data = mock.Mock()
mock_camera_data.return_value = data
loader._load_camera(cam_data)
assert mock_camera_data.called_with("Camera")
assert data.type == "PERSP"
The method I am testing is
def _load_camera(self, cam_data):
camera_data = bpy.data.cameras.new("Camera")
camera_data.type = cam_data["type"]
When I run the unit test, I get this error
AssertionError: assert <Mock name='new().type' id='140691645666360'> == 'PERSP'
E + where <Mock name='new().type' id='140691645666360'> = <Mock name='new()' id='140691645594760'>.type```
Figured it out. I needed to do configure mock so the code now looks like
data = mock.Mock()
data.configure_mock(type=None)
mock_camera_data.return_value = data
loader._load_camera(cam_data)
You need to configure/set the attribute first in the mock so that you can access it later. Now the attribute "type" can be accessed after the method has been run
Due to different names of an attribute I need to match a key of a key value pare against a regex.
The possible names are defined in a dict:
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe'),
]
The import attributes from an item in another dict:
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
Now I would like to return the value of the import attributes, if it is a known attribute (defined in my first dict MyAttr) matching different spelling of the attribute in question.
for key, value in ImportAttr:
if key == "Referenz-Nr" : ref = value
if key == "Farbe" : color = value
The goal is to return the value of a possible attribute if it is a known one.
print(ref)
print(color)
Should return the value if "Referenz-Nr" and "Farbe" are known attributes.
Obviously this pseudo code does not work, I just can't get my head around a function implementing regex for a key search.
It was not clear for me but maybe you want it:
#!/usr/bin/python3
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for key, value in ImportAttr:
if key in MyAttr[0][1].split('|'):
ref = value
if key in MyAttr[1][1].split('|'):
color = value
print("ref: ", ref)
print("color: ", color)
The split can split the string into a list of string by the separator ("|" character here) then you can check is the key in that list or not.
The following solution is a little bit tricky. If you don't want to hardcode the positions into your source you can use locals().
#!/usr/bin/python3
MyAttr = [
('ref', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for var, names in MyAttr:
for key, value in ImportAttr:
if key in names.split('|'):
locals()[var] = value
break
print("ref: ", ref)
print("color: ", color)
If you want, you can also use pandas to solve this problem for the large data sets in this way.
get_references_and_colors.py
import pandas as pd
import re
import json
def get_references_and_colors(lookups, attrs):
responses = []
refs = pd.Series(re.split(r"\|", lookups[0][0]))
colors = pd.Series(re.split(r"\|", lookups[1][0]))
d = {"ref": refs, "color": colors}
df = pd.DataFrame(d).fillna('') # To drop NaN entries, in case if refs
# & colors are not of same length
# ref color
# 0 Reference Color
# 1 Referenz color
# 2 Referenz-Nr tinta
# 3 Referenznummer farbe
# 4 Farbe
for key, value in attrs:
response = {}
response["for_attr"] = key
df2 = df.loc[df["ref"] == key]; # find in 'ref' column
if not df2.empty:
response["ref"] = value
else:
df3 = df.loc[df["color"] == key]; # find in 'color' column
if not df3.empty:
response["color"] = value
else:
response["color"] = None # Not Available
response["ref"] = None
responses.append(response)
return responses
if __name__ == "__main__":
LOOKUPS = [
('Reference|Referenz|Referenz-Nr|Referenznummer', 'a'),
('Color|color|tinta|farbe|Farbe', 'b'),
]
ATTR = [
('Referenz', 'Ref-Val'),
('color', 'red'),
('color2', 'orange'), # improper
('tinta', 'Tinta-col')
]
responses = get_references_and_colors(LOOKUPS, ATTR) # dictionary
pretty_response = json.dumps(responses, indent=4) # for pretty printing
print(pretty_response)
Output
[
{
"for_attr": "Referenz",
"ref": "Ref-Val"
},
{
"for_attr": "color",
"color": "red"
},
{
"for_attr": "color2",
"color": null,
"ref": null
},
{
"for_attr": "tinta",
"color": "Tinta-col"
}
]
Here is how I want my data to be : (key=name, value=[dob,[misc1, misc2,..]])
# my sample code
inputNames = [
('james', ['1990-01-19', ['james1', 'james2', 'james3'] ]),
('julie', ['1991-08-07', ['julie1', 'julie2'] ]),
('mikey', ['1989-01-23', ['mikey1'] ]),
('sarah', ['1988-02-05', ['sarah1', 'sarah2', 'sarah3', 'sarah4'] ])
]
class empData (list):
def __init__ (self, misc=None):
list.__init__([])
# print('add empdata: ',misc[0],misc[1])
self.dob = misc[0]
self.extend(misc[1])
def edprint(self):
return(self.dob, self)
class myEmp():
def __init__ (self, anm, amisc=None):
self.nm = anm
self.details = empData(amisc)
def printme(self):
print(self.nm, self.details.edprint())
emps={}
for i in inputNames:
m = myEmp(i[0],i[1])
emps[m] = m
print(emps)
# prints addresses of variables
# for actual data use the following lines
for ea in emps:
emps[ea].printme()
try:
with open('data.json','w') as wfd:
json.dump(emps, wfd)
except IOError as ioerr:
print('File error: ',str(ioerr))
wfd.close()
The above gives me an error: TypeError: key <main.myEmp object at 0x10143d588> is not a string
I am unable to figure out how to dump my dict of myEmp data structures as JSON
Before you can dump to json you need explicitly convert your data to a serializable type like dict or list. You could do this using a list comprehension:
>>> d = [{'key':ea.nm, 'value':[ea.details.dob, ea.details]} for ea in emps]
>>> json.dumps(d)
'[{"value": ["1991-08-07", ["julie1", "julie2"]], "key": "julie"}, {"value": ["1989-01-23", ["mikey1"]], "key": "mikey"}, {"value": ["1990-01-19", ["james1", "james2", "james3"]], "key": "james"}, {"value": ["1988-02-05", ["sarah1", "sarah2", "sarah3", "sarah4"]], "key": "sarah"}]'