Pythonic reading from config files - python

I have a python class which reads a config file using ConfigParser:
Config file:
[geography]
Xmin=6.6
Xmax=18.6
Ymin=36.6
YMax=47.1
Python code:
class Slicer:
def __init__(self, config_file_name):
config = ConfigParser.ConfigParser()
config.read(config_file_name)
# Rad the lines from the file
self.x_min = config.getfloat('geography', 'xmin')
self.x_max = config.getfloat('geography', 'xmax')
self.y_min = config.getfloat('geography', 'ymin')
self.y_max = config.getfloat('geography', 'ymax')
I feel that the last four lines are repetitive, and should somehow be compressed to one Pythonic line that would create a self.item variable for each item in the section.
Any ideas?
Adam
UPDATE:
Following your answers, I've modified my code to:
for item in config.items('geography'):
setattr(self, '_'+item[0], float(item[1]))
Now,
print self.__dict__
>>> {'_xmax': 18.600000000000001, '_ymax': 47.100000000000001,
'_ymin': 36.600000000000001, '_xmin': 6.5999999999999996}

I usually try to avoid external interactions in a constructor - makes it hard to test the code. Better pass a config parser instance or a fp-like object instead of a filename.

for line in ['x_min', 'x_max', 'y_min', 'y_max']:
setattr(self, line, config.getfloat('geography', line.replace('_', '')))

How about something like:
for key in ['xmin','xmax','ymin','ymax']:
self.__dict__[key] = config.getfloat('geography',key);
Note that the above will assign it to self.xmin instead of self.x_min... however, if you are fine with that naming, then this should work... otherwise, mapping between names would be more code than the original.

Related

Python unittest to create a mock .json file

I have function that looks like this:
def file1_exists(directory):
file1_path = os.path.join(directory, 'file1.json')
return os.path.exists(file1_path)
def file2_exists(directory):
log_path = os.path.join(directory, 'file2.log')
return os.path.exists(file2_path)
def create_file1(directory):
if file1_exists(directory):
return
if not file2_exists(directory):
return
mod_time = os.stat(os.path.join(directory, 'file2.log')).st_mtime
timestamp = {
"creation_timestamp": datetime.datetime.fromtimestamp(mod_time).isoformat()
}
with open(os.path.join(directory, "file1.json"), "w") as f:
json.dump(timestamp, f)
And I need to create a unittest that uses mock files.
The 3 Unittests that I need are:
A mock myfile.json file where I will assert that the function will return None (based on the 1st if statement, since the file exists)
A way to mock-hide the data.txt item in order to assert that the function will return None (based on the second if statement)
A mock myfile.json file where I write the required data and then assert that the return matches the expected outcome.
So far I've tried tests 1. and 2. with variations of this but I've been unsuccessful:
class TestAdminJsonCreation(unittest.TestCase):
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
I've also read about other solutions such as:
Python testing: using a fake file with mock & io.StringIO
But I haven't found a way to successfully do what I need...
You want to be able to provide different return values for each call to os.path.exists. Since you know the order of the calls, you can use side_effects to supply a list of values to be used in order.
class TestAdminJsonCreation(unittest.TestCase):
# No JSON file
#patch('os.path.exists', return_value=True)
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, log file
#patch('os.path.exists', side_effects=[True, False])
def test_existing_admin_json(self):
self.assertNone(postprocess_results.create_json_file())
# JSON file, no log file
#patch('os.path.exists', side_effects=[True, True])
def test_existing_admin_json(self):
...
The third test requires an actual file system, or for you to mock open.
So, I ended up breaking my original function into 3 different functions for easier testing.
The tests are performed by checking what the result of the 'def create_file1' would be when we feed it different return_values from the other 2 functions and when we add valid data.
class TestFile1JsonCreation(unittest.TestCase):
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=True)
#patch('file2_exists', return_value=False)
def test_existing_file1_json(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=False)
def test_missing_file2(self, file2_exists, file1_existsmock, stat, mopen):
create_file1('.')
# file1.json should not have been written
mopen.assert_not_called()
#patch('builtins.open', new_callable=mock_open())
#patch('os.stat')
#patch('file1_exists', return_value=False)
#patch('file2_exists', return_value=True)
def test_write_data(self, file2_exists, file1_existsmock, stat, mopen):
class FakeStat:
st_mtime = 1641992788
stat.return_value = FakeStat()
create_file1('.')
# file1.json should have been written
mopen.assert_called_once_with('./file1.json', 'w')
written_data = ''.join(
c[1][0]
for c in mopen().__enter__().write.mock_calls
)
expected_data = {"creation_timestamp": "2022-01-12T13:06:28"}
written_dict_data = json.loads(written_data)
self.assertEqual(written_dict_data, expected_data)

Is there a way to pass a named argument?

Here's my code:
def update_tags_with_value(tags, many_to_many_class):
if tags:
many_to_many_class.objects.filter(
personne=self.instance,
date_v_fin=None
).update(date_v_fin=django_datetime.now())
for idx_tag_with_value in tags:
pl = many_to_many_class.objects.create(
personne=self.instance,
langue=TagWithValue.objects.get(
pk=idx_tag_with_value
)
)
pl.save()
update_tags_with_value(self.cleaned_data.get('known_languages'),
PersonneLangue)
update_tags_with_value(self.cleaned_data.get('types_permis'),
PersonneTypesPermis)
So I found out I can easily pass a class as a parameter. But the last problem is about the named argument. If you watch my code, I do a langue=TagWithValue..[blabla]. The problem is that it's a "named" parameter, and I'd like to be able to pass it like that:
update_tags_with_value(self.cleaned_data.get('known_languages'),
PersonneLangue, 'langue')
update_tags_with_value(self.cleaned_data.get('types_permis'),
PersonneTypesPermis, 'permis')
And then to call it somehow like that (it doesn't work yet):
def update_tags_with_value(tags, many_to_many_class, champ):
if tags:
many_to_many_class.objects.filter(
personne=self.instance,
date_v_fin=None
).update(date_v_fin=django_datetime.now())
for idx_tag_with_value in tags:
pl = many_to_many_class.objects.create(
personne=self.instance,
champ=TagWithValue.objects.get(
pk=idx_tag_with_value
)
)
pl.save()
For now I get this error:
'champ' is an invalid keyword argument for this function
To be more precise, I need to call many_to_many_class.objects.create() one time with known_languages=blabla and another time with types_permis=blabla which, in other words, should call once many_to_many_class.objects.create(known_languages=blabla) and many_to_many_class.objects.create(types_permis=blabla) and I would like to know if there's a way to precise only the name of the parameter, not blabla
How to solve this?
Seems like normal keyword unpacking would work . . .
kwargs = {champ: TagWithValue.objects.get(
pk=idx_tag_with_value)}
pl = many_to_many_class.objects.create(
personne=self.instance,
**kwargs)
Of course, now champ might not be the best name for the function parameter, but hopefully you get the idea.
Here's my working solution, I don't know if it's the best "pythonic" way but it works like a charm:
def update_tags_with_value(tags, many_to_many_class, champ):
if tags:
many_to_many_class.objects.filter(
personne=self.instance,
date_v_fin=None
).update(date_v_fin=django_datetime.now())
for idx_tag_with_value in tags:
args = {
'personne': self.instance,
champ: TagWithValue.objects.get(
pk=idx_tag_with_value
)}
pl = many_to_many_class.objects.create(**args)
pl.save()
update_tags_with_value(self.cleaned_data.get('known_languages'),
PersonneLangue, 'langue')
update_tags_with_value(self.cleaned_data.get('types_permis'),
PersonneTypePermis, 'type_permis')
update_tags_with_value(self.cleaned_data.get('diplomes'),
PersonneDiplome, 'diplome')
update_tags_with_value(self.cleaned_data.get('centres_dinteret'),
PersonneCentreDInteret, 'centre_dinteret')
update_tags_with_value(self.cleaned_data.get('hobbies'),
PersonneHobby, 'hobby')

Creating loop for __main__

I am new to Python, and I want your advice on something.
I have a script that runs one input value at a time, and I want it to be able to run a whole list of such values without me typing the values one at a time. I have a hunch that a "for loop" is needed for the main method listed below. The value is "gene_name", so effectively, i want to feed in a list of "gene_names" that the script can run through nicely.
Hope I phrased the question correctly, thanks! The chunk in question seems to be
def get_probes_from_genes(gene_names)
import json
import urllib2
import os
import pandas as pd
api_url = "http://api.brain-map.org/api/v2/data/query.json"
def get_probes_from_genes(gene_names):
if not isinstance(gene_names,list):
gene_names = [gene_names]
#in case there are white spaces in gene names
gene_names = ["'%s'"%gene_name for gene_name in gene_names]**
api_query = "?criteria=model::Probe"
api_query= ",rma::criteria,[probe_type$eq'DNA']"
api_query= ",products[abbreviation$eq'HumanMA']"
api_query= ",gene[acronym$eq%s]"%(','.join(gene_names))
api_query= ",rma::options[only$eq'probes.id','name']"
data = json.load(urllib2.urlopen(api_url api_query))
d = {probe['id']: probe['name'] for probe in data['msg']}
if not d:
raise Exception("Could not find any probes for %s gene. Check " \
"http://help.brain- map.org/download/attachments/2818165/HBA_ISH_GeneList.pdf? version=1&modificationDate=1348783035873 " \
"for list of available genes."%gene_name)
return d
def get_expression_values_from_probe_ids(probe_ids):
if not isinstance(probe_ids,list):
probe_ids = [probe_ids]
#in case there are white spaces in gene names
probe_ids = ["'%s'"%probe_id for probe_id in probe_ids]
api_query = "? criteria=service::human_microarray_expression[probes$in%s]"% (','.join(probe_ids))
data = json.load(urllib2.urlopen(api_url api_query))
expression_values = [[float(expression_value) for expression_value in data["msg"]["probes"][i]["expression_level"]] for i in range(len(probe_ids))]
well_ids = [sample["sample"]["well"] for sample in data["msg"] ["samples"]]
donor_names = [sample["donor"]["name"] for sample in data["msg"] ["samples"]]
well_coordinates = [sample["sample"]["mri"] for sample in data["msg"] ["samples"]]
return expression_values, well_ids, well_coordinates, donor_names
def get_mni_coordinates_from_wells(well_ids):
package_directory = os.path.dirname(os.path.abspath(__file__))
frame = pd.read_csv(os.path.join(package_directory, "data", "corrected_mni_coordinates.csv"), header=0, index_col=0)
return list(frame.ix[well_ids].itertuples(index=False))
if __name__ == '__main__':
probes_dict = get_probes_from_genes("SLC6A2")
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
whoa, first things first. Python ain't Java, so do yourself a favor and use a nice """xxx\nyyy""" string, with triple quotes to multiline.
api_query = """?criteria=model::Probe"
,rma::criteria,[probe_type$eq'DNA']
...
"""
or something like that. you will get white spaces as typed, so you may need to adjust.
If, like suggested, you opt to loop on the call to your function through a file, you will need to either try/except your data-not-found exception or you will need to handle missing data without throwing an exception. I would opt for returning an empty result myself and letting the caller worry about what to do with it.
If you do opt for raise-ing an Exception, create your own, rather than using a generic exception. That way your code can catch your expected Exception first.
class MyNoDataFoundException(Exception):
pass
#replace your current raise code with...
if not d:
raise MyNoDataFoundException(your message here)
clarification about catching exceptions, using the accepted answer as a starting point:
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
#keep track of your input data
search_data = line.strip()
try:
probes_dict = get_probes_from_genes(search_data)
except MyNoDataFoundException, e:
#and do whatever you feel you need to do here...
print "bummer about search_data:%s:\nexception:%s" % (search_data, e)
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
You may want to create a file with Gene names, then read content of the file and call your function in the loop. Here is an example below
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
probes_dict = get_probes_from_genes(line.strip())
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)

How to get property names from .py file which contains class definition

I get on input file which contains only one class definition (class is just constants container, contains keys for json, similar file is used on Java client to decode json) looks like:
class Constants(object):
VERSION= 'version'
OS = 'os'
PROGRAM = 'program'
# more constants .....
How to get dictionary of all properties defined inside Constants, how to parse file to dictionary ?
I want to compress keys and generate new .py and .java files with same constants keys but shorter keys.
Import the module
I used imp.load_module instead of __import__ in the following code to import abitrary file path.
Find the class object.
Iterate the class attribute using vars:
import imp
path = '/path/to/file'
with open(path, 'U') as f:
mod = imp.load_module('temporary', f, path, ('.py', 'U', imp.PY_SOURCE))
builtins = vars(__builtins__)
cls = next(value for name, value in vars(mod).items() if name not in builtins)
const_dict = {name: value for name, value in vars(mod.Constants).items()
if not name.startswith('_')}
print(const_dict)
# => {'OS': 'os', 'VERSION': 'version', 'PROGRAM': 'program'}
Tested in Python on 2.7.6, 3.3.2, 3.4.0b2.
>>> [elem for elem in dir(Constants) if not elem.startswith("_")]
['OS', 'PROGRAM', 'VERSION']
Expanding on answer number one:
# dir(yourClass) will get you all the methods and properties of yourClass and parents wheather yourClass
# is a definition or an instance
elements = [elem for elem in dir(Constants) if not elem.startswith("_")]
# Using yourClass.__dict__.keys() will give you the same of dir if applied to a definition but only instance members
# if applied to an instance
elements = [elem for elem in Constants.__dict__.keys() if not elem.startswith("_")]
# You can get to the values of the properties with
for el in elements:
print Constants.__dict__[el]
# plus whatever you want to do to those elements
# Or if you're using the __dict__ way
Constants.__dict__.items()
Here is an example of using execfile and python 2.6 (I work on Debian Wheezy). A shorter version to build the dictionary for python version 2.7 and higher is given too. The constants.py file can define several classes, all of them will be parsed.
#!/usr/bin/env python
d = {}
const_d = {}
execfile("constants.py", d)
for k,cls in d.items():
if k not in vars(__builtins__):
if type(cls) is type:
# Python version < 2.7
attributes = {}
for name, value in vars(cls).items():
if not name.startswith('__'):
attributes[name] = value
# Python version >= 2.7
#attributes = {name: value for name, value in vars(cls).items() if not name.startswith('__')}
const_d[cls.__name__] = attributes
pass
pass
pass
print(const_d)

string comprehension in Python

I am working with images that have multiple layer which are described in their meta data that looks like this..
print layers
Cube1[visible:true, mode:Normal]{r:Cube1.R, g:Cube1.G, b:Cube1.B, a:Cube1.A}, Ground[visible:true, mode:Lighten, opacity:186]{r:Ground.R, g:Ground.G, b:Ground.B, a:Ground.A}, Cube3[visible:true, mode:Normal]{r:Cube3.R, g:Cube3.G, b:Cube3.B, a:Cube3.A}
I'm wondering if this formatting could be recognizable by Python as more then a string. Ideally I would like to call up the properties of any one for the layers. For example:
print layers[0].mode
"Normal"
On another post someone showed me how to get the names of each layer, which was very helpful, but now I'm looking to use the other info.
PS: if it helps I don't care about any of the info inside the {}
Thanks
print type(layers)
<type 'str'>"
In case you don't want to deal with regex ...
layers = "Cube1[visible:true, mode:Normal]{r:Cube1.R, g:Cube1.G, b:Cube1.B, a:Cube1.A}, Ground[visible:true, mode:Lighten, opacity:186]{r:Ground.R, g:Ground.G, b:Ground.B, a:Ground.A}, Cube3[visible:true, mode:Normal]{r:Cube3.R, g:Cube3.G, b:Cube3.B, a:Cube3.A}"
layer_dict = {}
parts = layers.split('}')
for part in parts:
part = part.strip(', ')
name_end = part.find('[')
if name_end < 1:
continue
name = part[:name_end]
attrs_end = part.find(']')
attrs = part[name_end+1:attrs_end].split(', ')
layer_dict[name] = {}
for attr in attrs:
attr_parts = attr.split(':')
layer_dict[name][attr_parts[0]] = attr_parts[1]
print 'Cube1 ... mode:', layer_dict.get('Cube1').get('mode')
print 'Ground ... opacity:', layer_dict.get('Ground').get('opacity')
print 'Cube3', layer_dict.get('Cube3')
output ...
Cube1 ... mode: Normal
Ground ... opacity: 186
Cube3 {'visible': 'true', 'mode': 'Normal'}
Parsing (Pyparsing et al) is surely the correct and extensible way to go, but here's a fast-and-dirty object and constructors using regexes and comprehensions to parse properties and bolt them on with setattr(). All constructive criticisms welcome!
import re
#import string
class Layer(object):
#classmethod
def make_list_from_string(cls,s):
all_layers_params = re.findall(r'(\w+)\[([^\]]+)\]',s)
return [cls(lname,largs) for (lname, largs) in all_layers_params]
def __init__(self,name,args):
self.name = name
for (larg,lval) in re.findall(r'(\w+):(\w+)(?:,\w*)?', args):
setattr(self,larg,lval)
def __str__(self):
return self.name + '[' + ','.join('%s:%s' % (k,v) for k,v in self.__dict__.iteritems() if k!='name') + ']'
def __repr__(self):
return self.__str__()
t = 'Cube1[visible:true, mode:Normal]{r:Cube1.R, g:Cube1.G, b:Cube1.B, a:Cube1.A}, Ground[visible:true, mode:Lighten, opacity:186]{r:Ground.R, g:Ground.G, b:Ground.B, a:Ground.A}, Cube3[visible:true, mode:Normal]{r:Cube3.R, g:Cube3.G, b:Cube3.B, a:Cube3.A}'
layers = Layer.make_list_from_string(t)
I moved all the imperative code into __init__() or the classmethod Layers.make_list_from_string().
Currently it stores all args as string, it doesn't figure opacity is int/float, but that's just an extra try...except block.
Hey, it does the job you wanted. And as a bonus it throws in mutability:
print layers[0].mode
'Normal'
print layers[1].opacity
'186'
print layers[2]
Cube3[visible:true,mode:Normal]
layers[0].mode = 'Weird'
print layers[0].mode
'Weird'
"I'm wondering if this formatting could be recognizable by Python as more then a string."
Alternatively, I was thinking if you tweaked the format a little, eval()/exec() could be used, but that's yukkier, slower and a security risk.

Categories

Resources