Related
I am struggling to understand classes/objects and using a csv with them.
I have a CSV with 26 rows, 1 being the header, the other containing rows of info. Small example below
id,food,food_print,cal1,cal2,expi1999,expi2000,expi2001
1,bun,bun_bun,45.3434,199.32323,23.3333,45.4444,33.33333
2,burger,burger_bun,45.342343,200.34243,34.3333,0,9
3,pickle,pickle_seed,67.345454,34.3434,34,56,33
4,chicken,chicken_egg,44.34343,43.343343,43,434,34343
I have my class as follows:
class City(object):
def __init__(self, food = 'n/a', foodprint = 'n/a', cal1 = -999, cal2 = -999,
expi1999 = -999, expi2000 = -999, expi2001 = -999)
self.food = food
self.foodprint = foodprint
self.cal1 = cal1
self.cal2 = cal2
self.expi1999 = expi1999
self.expi2000 = expi2000
self.expi2001 = expi2001
meals = []
foodfile = open('Food.csv', 'rt')
headers = foodfile.readline().strip().split(',')
headers = headers.split(',')
for line in foodfile:
foodfields = foodfile.readline().strip().split(',')
How do I write in the rows from my food csv into an object to be referenced in the class?
Assuming all colums are filled in every row:
try:
for line in foodfile:
foodfields = foodfile.readline().strip().split(',')
meals.append(City(foodfields[1],foodfields[2],foodfields[3],foodfields[4],foodfields[5],foodfields[6],foodfields[7]))
true tone to avoid dynamic classes. Because your code become unpredictable in future changes. Use dataclass instead. Also this behaviour let you refer to this "data model" as annotation during data exchange with any callable.
from dataclasses import dataclass
#dataclass
class City:
food: str = 'n/a'
foodprint: str = 'n/a'
cal1: int = -999
cal2: int = -999
expi1999: int = -999
expi2000: int = -999
expi2001: int = -999
But if you want to now how to do it - setattr function exists in global namespace.
class DynamicContainer:
#classmethod
def create(cls, headers: list[str], data: list[str]):
obj = cls()
for header, value in zip(headers, data):
setattr(obj, header, value)
return obj
headers = 'id,food,food_print,cal1,cal2,expi1999,expi2000,expi2001'.split(',')
data = '1,bun,bun_bun,45.3434,199.32323,23.3333,45.4444,33.33333'.split(',')
cont = DynamicContainer.create(headers, data)
print(cont.id, cont.cal1, cont.expi2000)
Here is how I would do it:
import csv
class City(object):
def __init__(self, food=None, foodprint=None, cal1=None, cal2=None, expi1999=None, expi2000=None, expi2001=None):
self.food = food
self.foodprint = foodprint
self.cal1 = float(cal1)
self.cal2 = float(cal2)
self.expi1999 = float(expi1999)
self.expi2000 = float(expi2000)
self.expi2001 = float(expi2001)
def __repr__(self):
return (
f"{self.__class__.__name__}("
f"food={self.food!r}"
f", "
f"food_print={self.foodprint!r}"
f", "
f"cal1={self.cal1!r}"
f", "
f"cal2={self.cal2!r}"
f", "
f"expi1999={self.expi1999!r}"
f", "
f"expi2000={self.expi2000!r}"
f", "
f"expi2001={self.expi2001!r}"
f")"
)
with open("Food.csv") as stream:
next(stream) # Skip the header row
reader = csv.reader(stream)
meals = [City(*row[1:]) for row in reader]
Here is what rows looks like:
[City(food='bun', food_print='bun_bun', cal1=45.3434, cal2=199.32323, expi1999=23.3333, expi2000=45.4444, expi2001=33.33333),
City(food='burger', food_print='burger_bun', cal1=45.342343, cal2=200.34243, expi1999=34.3333, expi2000=0.0, expi2001=9.0),
City(food='pickle', food_print='pickle_seed', cal1=67.345454, cal2=34.3434, expi1999=34.0, expi2000=56.0, expi2001=33.0),
City(food='chicken', food_print='chicken_egg', cal1=44.34343, cal2=43.343343, expi1999=43.0, expi2000=434.0, expi2001=34343.0)]
Notes
The repr function is to show the contents of the class object, you can ignore it
I use the csv module to ease parsing the CSV data
I also converted the values into floating point numbers.
My goal is to run through all the *.py files in a directory and look at each call to a specific function test_func. This function has some optional parameters and I need to audit when the function is called with the optional parameters. My thought is to use the ast library (specifically ast.walk()).
I suppose this is a static analysis problem.
# function definition
def test_func(
name: str,
*,
user: Optional['User'] = None,
request: Optional[WebRequest] = None,
**kwargs
) -> bool:
pass
# somewhere in another file ...
test_func('name0')
test_func('name1', request=request)
test_func('name1')
test_func('name2', user=user)
# figure out something like below:
# name0 is never given any optional parameters
# name1 is sometimes given request
# name2 is always given user
Here is a POC :
import typing
from typing import Optional
class User: pass
class WebRequest: pass
# function definition
def test_func(
name: str,
*,
user: Optional['User'] = None,
request: Optional[WebRequest] = None,
**kwargs
) -> bool:
pass
# somewhere in another file ...
test_func('name0')
test_func('name1', request=WebRequest())
test_func('name1')
test_func('name2', user=User())
# figure out something like below:
# name0 is never given any optional parameters
# name1 is sometimes given request
# name2 is always given user
with open(__file__, "rt") as py_file:
py_code = py_file.read()
import collections
each_call_kwargs_names_by_arg0_value: typing.Dict[str, typing.List[typing.Tuple[str, ...]]] = collections.defaultdict(list)
import ast
tree = ast.parse(py_code)
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if hasattr(node.func, "id"):
name = node.func.id
elif hasattr(node.func, "attr"):
name = node.func.attr
elif hasattr(node.func, "value"):
name = node.func.value.id
else:
raise NotImplementedError
print(name)
if name == "test_func":
arg0_value = typing.cast(ast.Str, node.args[0]).s
each_call_kwargs_names_by_arg0_value[arg0_value].append(
tuple(keyword.arg for keyword in node.keywords)
)
for arg0_value, each_call_kwargs_names in each_call_kwargs_names_by_arg0_value.items():
frequency = "NEVER" if all(len(call_args) == 0 for call_args in each_call_kwargs_names) else \
"ALWAYS" if all(len(call_args) != 0 for call_args in each_call_kwargs_names) else \
"SOMETIMES"
print(f"{arg0_value!r} {frequency}: {each_call_kwargs_names}")
# Output :
# 'name0' NEVER: [()]
# 'name1' SOMETIMES: [('request',), ()]
# 'name2' ALWAYS: [('user',)]
You can use a recursive generator function to traverse an ast of your Python code:
import ast
def get_calls(d, f = ['test_func']):
if isinstance(d, ast.Call) and d.func.id in f:
yield None if not d.args else d.args[0].value, [i.arg for i in d.keywords]
for i in getattr(d, '_fields', []):
vals = (m if isinstance((m:=getattr(d, i)), list) else [m])
yield from [j for k in vals for j in get_calls(k, f = f)]
Putting it all together:
import os, collections
d = collections.defaultdict(list)
for f in os.listdir(os.getcwd()):
if f.endswith('.py'):
with open(f) as f:
for a, b in get_calls(ast.parse(f.read())):
d[a].append(b)
r = {a:{'verdict':'never' if not any(b) else 'always' if all(b) else 'sometimes', 'params':[i[0] for i in b if i]}
for a, b in d.items()}
Output:
{'name0': {'verdict': 'never', 'params': []},
'name1': {'verdict': 'sometimes', 'params': ['request']},
'name2': {'verdict': 'always', 'params': ['user']}}
After going over de fora, i did not find something that could solve this issue properly. I want to convert a file written in php to a python dictionary. In this case this file is a converted TrueType Font-file.
<?php
$type = 'TrueType';
$name = 'Calibri';
$desc = array('Ascent'=>750,'Descent'=>-250,'CapHeight'=>632,'Flags'=>32,'FontBBox'=>'[-503 -313 1240 1026]','ItalicAngle'=>0,'StemV'=>70,'MissingWidth'=>507);
$up = -113;
$ut = 65;
$cw = array(
chr(0)=>507,chr(1)=>507,chr(2)=>507,chr(3)=>507,chr(4)=>507,chr(5)=>507,chr(6)=>507,chr(7)=>507,chr(8)=>507,chr(9)=>507,chr(10)=>507,chr(11)=>507,chr(12)=>507,chr(13)=>507,chr(14)=>507,chr(15)=>507,chr(16)=>507,chr(17)=>507,chr(18)=>507,chr(19)=>507,chr(20)=>507,chr(21)=>507,
chr(22)=>507,chr(23)=>507,chr(24)=>507,chr(25)=>507,chr(26)=>507,chr(27)=>507,chr(28)=>507,chr(29)=>507,chr(30)=>507,chr(31)=>507,' '=>226,'!'=>326,'"'=>401,'#'=>498,'$'=>507,'%'=>715,'&'=>682,'\''=>221,'('=>303,')'=>303,'*'=>498,'+'=>498,
','=>250,'-'=>306,'.'=>252,'/'=>386,'0'=>507,'1'=>507,'2'=>507,'3'=>507,'4'=>507,'5'=>507,'6'=>507,'7'=>507,'8'=>507,'9'=>507,':'=>268,';'=>268,'<'=>498,'='=>498,'>'=>498,'?'=>463,'#'=>894,'A'=>579,
'B'=>544,'C'=>533,'D'=>615,'E'=>488,'F'=>459,'G'=>631,'H'=>623,'I'=>252,'J'=>319,'K'=>520,'L'=>420,'M'=>855,'N'=>646,'O'=>662,'P'=>517,'Q'=>673,'R'=>543,'S'=>459,'T'=>487,'U'=>642,'V'=>567,'W'=>890,
'X'=>519,'Y'=>487,'Z'=>468,'['=>307,'\\'=>386,']'=>307,'^'=>498,'_'=>498,'`'=>291,'a'=>479,'b'=>525,'c'=>423,'d'=>525,'e'=>498,'f'=>305,'g'=>471,'h'=>525,'i'=>229,'j'=>239,'k'=>455,'l'=>229,'m'=>799,
'n'=>525,'o'=>527,'p'=>525,'q'=>525,'r'=>349,'s'=>391,'t'=>335,'u'=>525,'v'=>452,'w'=>715,'x'=>433,'y'=>453,'z'=>395,'{'=>314,'|'=>460,'}'=>314,'~'=>498,chr(127)=>507,chr(128)=>507,chr(129)=>507,chr(130)=>250,chr(131)=>305,
chr(132)=>418,chr(133)=>690,chr(134)=>498,chr(135)=>498,chr(136)=>395,chr(137)=>1038,chr(138)=>459,chr(139)=>339,chr(140)=>867,chr(141)=>507,chr(142)=>468,chr(143)=>507,chr(144)=>507,chr(145)=>250,chr(146)=>250,chr(147)=>418,chr(148)=>418,chr(149)=>498,chr(150)=>498,chr(151)=>905,chr(152)=>450,chr(153)=>705,
chr(154)=>391,chr(155)=>339,chr(156)=>850,chr(157)=>507,chr(158)=>395,chr(159)=>487,chr(160)=>226,chr(161)=>326,chr(162)=>498,chr(163)=>507,chr(164)=>498,chr(165)=>507,chr(166)=>498,chr(167)=>498,chr(168)=>393,chr(169)=>834,chr(170)=>402,chr(171)=>512,chr(172)=>498,chr(173)=>306,chr(174)=>507,chr(175)=>394,
chr(176)=>339,chr(177)=>498,chr(178)=>336,chr(179)=>334,chr(180)=>292,chr(181)=>550,chr(182)=>586,chr(183)=>252,chr(184)=>307,chr(185)=>246,chr(186)=>422,chr(187)=>512,chr(188)=>636,chr(189)=>671,chr(190)=>675,chr(191)=>463,chr(192)=>579,chr(193)=>579,chr(194)=>579,chr(195)=>579,chr(196)=>579,chr(197)=>579,
chr(198)=>763,chr(199)=>533,chr(200)=>488,chr(201)=>488,chr(202)=>488,chr(203)=>488,chr(204)=>252,chr(205)=>252,chr(206)=>252,chr(207)=>252,chr(208)=>625,chr(209)=>646,chr(210)=>662,chr(211)=>662,chr(212)=>662,chr(213)=>662,chr(214)=>662,chr(215)=>498,chr(216)=>664,chr(217)=>642,chr(218)=>642,chr(219)=>642,
chr(220)=>642,chr(221)=>487,chr(222)=>517,chr(223)=>527,chr(224)=>479,chr(225)=>479,chr(226)=>479,chr(227)=>479,chr(228)=>479,chr(229)=>479,chr(230)=>773,chr(231)=>423,chr(232)=>498,chr(233)=>498,chr(234)=>498,chr(235)=>498,chr(236)=>229,chr(237)=>229,chr(238)=>229,chr(239)=>229,chr(240)=>525,chr(241)=>525,
chr(242)=>527,chr(243)=>527,chr(244)=>527,chr(245)=>527,chr(246)=>527,chr(247)=>498,chr(248)=>529,chr(249)=>525,chr(250)=>525,chr(251)=>525,chr(252)=>525,chr(253)=>453,chr(254)=>525,chr(255)=>453);
$enc = 'cp1252';
$uv = array(0=>array(0,128),128=>8364,130=>8218,131=>402,132=>8222,133=>8230,134=>array(8224,2),136=>710,137=>8240,138=>352,139=>8249,140=>338,142=>381,145=>array(8216,2),147=>array(8220,2),149=>8226,150=>array(8211,2),152=>732,153=>8482,154=>353,155=>8250,156=>339,158=>382,159=>376,160=>array(160,96));
$file = 'calibri.z';
$originalsize = 77252;
$subsetted = true;
?>
to:
font = {"type":"TrueType",
"name":"Calibri",
"desc":{"Ascent":750,etc...},
etc......
}
I thank all in advance!
P.S. I reuploaded this question (my previous was closed) to share my solution in case someone else needs it.
The solution i found was just writing the parsing myself:
import re
import regex
def parse_php(fontfile):
font_dict = {}
for item in php_chunks(fontfile):
key, attr = item.split(" = ")
attr = attr.replace("\t","").strip()
attr = re.sub("^(.*);",r"\1",attr)
# re.split("[,](?!'=>)",data["cw"])
if re.match("'(.*)'",attr):
attr = re.sub("'(.*)'",r"\1",attr)
try:
attr = eval(attr)
font_dict[key.replace("$","").strip()] = attr
except:
if "array" in attr:
if re.match("^array\(",attr):
attr_dict = {}
attr = re.sub("array\((.*)\)",r"\1",attr)
attr = regex.split("(?<!array\(\d*)[,](?!'=>)",attr)
for row in attr:
dict_key, dict_item = row.strip().split("=>")
try:
attr_dict[str(eval(dict_key))] = eval(dict_item)
except:
attr_dict[str(eval(dict_key))] = dict_item
font_dict[key.replace("$","").strip()] = attr_dict
else:
font_dict[key.replace("$","").strip()] = attr
return font_dict
def php_chunks(raw):
raw = raw.read()
chunk = ""
for idx, line in enumerate(raw.splitlines()):
if line.startswith("$"):
if idx != 1:
yield chunk
chunk = ""
chunk = "".join(line)
else:
chunk = "".join([chunk,line])
Full code is at the end.
I've written a program that reads in data from a csv file. It creates a class of variable called "Facility". Each facility can have multiple water sources, so there is another class called "WaterSource" which appends a list of attributes for an individual water source to each Facility. If I call :
data['00312']
I get output:
Facility 00312 US Aggregates Inc IN
If I ask for data['00312'].records:
[ WaterSource 00312 WELL Willshire 80 683175 4511625,
WaterSource 00312 WELL Willshire 80 682550 4511750,
WaterSource 00312 INTAKE Willshire 1200 Unnamed Quarry 683225 4512075,
WaterSource 00312 INTAKE Willshire 1200 Unnamed Quarry 683225 4512050]
I need to create a report that iterates over every variable in the class and returns a list of Facilities that have multiple water sources. Thus the final output would a list of [RegNo, Facility Name, No. of WaterSources] such as:
[Facility 00312 US Aggregates Inc 4]
The issue I'm having is understanding how to iterate over the Facilities to count the records of the water sources appended to each Facilities object. I think I could add a method into the class somewhere, but I can't quite figure out where. I'm a python beginner, so please forgive me if this isn't quite the right vocabulary. I'm not even sure where to start, so any suggestions you could offer would be helpful.
class Facilities:
def __init__(self, regno, name, mwu): ##creates facility attributes
self.regno = regno
self.name = name
self.mwu = mwu
self.records = []
def add_record(self,record):
self.records.append(record)
def __repr__(self):
'''Makes a string representation'''
return 'Facility {0} {1} {2}'.format(self.regno, self.name, self.mwu)
class WaterSource(Facility):
'''holds info about the water source'''
def __init__(self, regno, source, quad, cap, body, utmE, utmN): ##creates water source attributes
self.regno = regno
self.source = source
self.quad = quad
self.cap = cap
self.body = body
self.utmE = utmE
self.utmN = utmN
self.records = []
def source_data(self):
regnos = []
sources = []
quads = []
caps = []
bodies = []
utmEs = []
utmNs = []
for record in self.records:
regnos.append(record.regno)
sources.append(record.source)
quads.append(record.quad)
caps.append(record.cap)
bodies.append(record.body)
utmEs.append(record.utmE)
utmNs.append(record.utmN)
return (regnos,sources,quads,caps,bodies,utmEs,utmNs)
def __repr__(self):
return ' WaterSource {0} {1} {2} {3} {4} {5} {6}'.format(self.regno, \
self.source, self.quad, self.cap, self.body, self.utmE, self.utmN)
def read_data(filename):
rv = {}
for r in csv.DictReader(open(filename, 'r', encoding='UTF-8')):
regno = r['RegNo']
if r['RegNo'] not in rv:
rv[regno] = Facilities(r['RegNo'],r['Facility'], r['MWU Code'])
rv[regno].add_record(WaterSource(regno, r['Source Code'], r['Quadrangle'], \
r['Capacity (GPM)'], r['Water Body Name'], r['UTM East'], r['UTM North']))
return rv
data = read_data('Fac-2013-2016.csv')
[Facility 00312 US Aggregates Inc 4]
The issue I'm having is understanding how to iterate over the
Facilities to count the records of the water sources appended to each
Facilities object.
From my understanding, simply add a method and return a count of the objects or straight up count the records using len unless there is something more to what you are asking for?
class Facilities:
def __init__(self, regno, name, mwu): ##creates facility attributes
self.regno = regno
self.name = name
self.mwu = mwu
self.records = []
def add_record(self,record):
self.records.append(record)
def __repr__(self):
'''Makes a string representation'''
return 'Facility {0} {1} {2} {3}'.format(self.regno, self.name, self.mwu , len(self.records))
All of your Facilities are stored as values in the dictionary data using the facility's RegNo for the keys. You can iterate over all the data using the dictionary items method. The length of each facility's records attribute is the number of water sources. You can build a format string to use the information you need.
for reg_no, facility in data.items():
no_of_sources = len(facility.records)
print(f'Facility {facility.regno} {facility.name} {no_of_sources}') #Python v3.6+
#print('Facility {} {} {}'.format(facility.regno, facility.name, no_of_sources)) #Python versions <3.6
We wrote a small wrapper to a twitter app and published this information to http://pypi.python.org. But setup.py just contained a single field for specifying email / name of the author. How do I specify multiple contributors / email list, to the following fields since we would like this package to be listed under our names, much similar to how it shows up in http://rubygems.org.
author='foo',
author_email='foo.bar#gmail.com',
As far as I know, setuptools doesn't support using a list of strings in order to specify multiple authors. Your best bet is to list the authors in a single string:
author='Foo Bar, Spam Eggs',
author_email='foobar#baz.com, spameggs#joe.org',
I'm not sure if PyPI validates the author_email field, so you may run into trouble with that one. In any case, I would recommend you limit these to a single author and mention all contributors in the documentation or description.
Some sources:
This has been registered as a bug, actually, but it seems like support for multiple authors was not implemented. Here is an alternative solution. Here is an idea for how to provide a contact email for a project with multiple authors.
I'm sort of just piggybacking off of #modocache's answer, in case you want some specifics.
Throughout this answer, I'll be refering to a python3.6 version of the FOO-PYTHON-ENV\Lib\distutils\dist.py file
To reiterate, you cannot use a list in the author field. Here's why:
Spoiler: Two methods belonging to the DistributionMetadata class are the reason --
def _read_field(name):
value = msg[name]
if value == 'UNKNOWN':
return None
return value
def _read_list(name):
values = msg.get_all(name, None)
if values == []:
return None
return values
Here's where you'll hit an error if you try to stick a list in the author field:
class DistributionMetadata:
#*...(R E D A C T E D)...*#
def read_pkg_file(self, file):
"""Reads the metadata values from a file object."""
#*...(R E D A C T E D)...*#
# ####################################
# Note the usage of _read_field() here
# ####################################
self.name = _read_field('name')
self.version = _read_field('version')
self.description = _read_field('summary')
# we are filling author only.
self.author = _read_field('author')
self.maintainer = None
self.author_email = _read_field('author-email')
self.maintainer_email = None
self.url = _read_field('home-page')
self.license = _read_field('license')
#*...(R E D A C T E D)...*#
# ###################################
# Note the usage of _read_list() here
# ###################################
self.platforms = _read_list('platform')
self.classifiers = _read_list('classifier')
#*...(R E D A C T E D)...*#
& Here's the whole thing:
class DistributionMetadata:
"""Dummy class to hold the distribution meta-data: name, version,
author, and so forth.
"""
_METHOD_BASENAMES = ("name", "version", "author", "author_email",
"maintainer", "maintainer_email", "url",
"license", "description", "long_description",
"keywords", "platforms", "fullname", "contact",
"contact_email", "classifiers", "download_url",
# PEP 314
"provides", "requires", "obsoletes",
)
def __init__(self, path=None):
if path is not None:
self.read_pkg_file(open(path))
else:
self.name = None
self.version = None
self.author = None
self.author_email = None
self.maintainer = None
self.maintainer_email = None
self.url = None
self.license = None
self.description = None
self.long_description = None
self.keywords = None
self.platforms = None
self.classifiers = None
self.download_url = None
# PEP 314
self.provides = None
self.requires = None
self.obsoletes = None
def read_pkg_file(self, file):
"""Reads the metadata values from a file object."""
msg = message_from_file(file)
def _read_field(name):
value = msg[name]
if value == 'UNKNOWN':
return None
return value
def _read_list(name):
values = msg.get_all(name, None)
if values == []:
return None
return values
metadata_version = msg['metadata-version']
self.name = _read_field('name')
self.version = _read_field('version')
self.description = _read_field('summary')
# we are filling author only.
self.author = _read_field('author')
self.maintainer = None
self.author_email = _read_field('author-email')
self.maintainer_email = None
self.url = _read_field('home-page')
self.license = _read_field('license')
if 'download-url' in msg:
self.download_url = _read_field('download-url')
else:
self.download_url = None
self.long_description = _read_field('description')
self.description = _read_field('summary')
if 'keywords' in msg:
self.keywords = _read_field('keywords').split(',')
self.platforms = _read_list('platform')
self.classifiers = _read_list('classifier')
# PEP 314 - these fields only exist in 1.1
if metadata_version == '1.1':
self.requires = _read_list('requires')
self.provides = _read_list('provides')
self.obsoletes = _read_list('obsoletes')
else:
self.requires = None
self.provides = None
self.obsoletes = None
Consider using flit to build the package, as this build system supports multiple authors and maintainers. Store this metadata in pyproject.toml as follows:
[build-system]
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"
[project]
...
authors = [
{name = "First1 Last1", email = "name1#foo.bar"},
{name = "First2 Last2", email = "name2#foo.bar"},
]
maintainers = [
{name = "First1 Last1", email = "name1#foo.bar"},
{name = "First2 Last2", email = "name2#foo.bar"},
]