Trying to avoid creating numerous variables in python - python

I am new to python and I have a lot of variables I will be using in this script. These variables are being used to grab data from each column in an uploaded file. I have added variables for each object type and I have about 12 more object types to add. Isn't there a better way I can do this? I have the file it's grabbing data from here:
Action Object Solution ID hostgroup_name alias
Add Host Group ISD-CR ISD-CR_database ISD-CR Database
Add Service ISD-CR ISD-CR_database
Update Service Group ISD-CR ISD-CR Database
Delete Service ISD-CR ISD-CR_database
Here is the script I have so far.
from pynag import Model
from pynag.Parsers import config
def addObject():
# Add hostgroup object
hg = Model.Hostgroup()
hg.set_filename('/etc/nagios/objects/solution1/{0}.cfg'.format(target_hostgroup_name))
# Adding all attributes to allow any to be added if needed
hg.hostgroup_name = target_hostgroup_name
hg.alias = target_alias
hg.members = target_members
hg.hostgroup_members = target_hostgroup_members
hg.notes = target_notes
hg.notes_url = target_notes_url
hg.action_url = target_action_url
# Save
hg.save()
print "hostgroup added"
# Add service object
s = Model.Service()
s.set_filename('/etc/nagios/objects/solution1/{0}.cfg'.format(target_hostgroup_name))
# Adding all attributes to allow any to be added if needed
s.host_name = target_host_name
s.hostgroup_name = target_hostgroup_name
s.service_description = target_service_description
s.display_name = target_display_name
s.servicegroups = target_servicegroups
s.is_volatile = target_is_volatile
s.check_command = target_check_command
s.initial_state = target_initial_state
s.max_check_attempts = target_max_check_attempts
s.check_interval = target_check_interval
s.retry_interval = target_retry_interval
s.active_checks_enabled = target_active_checks_enabled
s.passive_checks_enabled = target_passive_checks_enabled
s.check_period = target_check_period
s.obsess_over_service = target_obsess_over_service
s.check_freshness = target_check_freshness
s.freshness_threshold = target_freshness_threshold
s.event_handler = target_event_handler
s.event_handler_enabled = target_event_handler_enabled
s.low_flap_threshold = target_low_flap_threshold
s.high_flap_threshold = target_high_flap_threshold
s.flap_detection_enabled = target_flap_detection_enabled
s.flap_detection_options = target_flap_detection_options
s.process_perf_data = target_process_perf_data
s.retain_status_information = target_retain_status_information
s.retain_nonstatus_information = target_retain_nonstatus_information
s.notification_interval = target_notification_interval
s.first_notification_delay = target_first_notification_delay
s.notification_period = target_notification_period
s.notification_options = target_notification_options
s.notification_enabled = target_notifications_enabled
s.contacts = target_contacts
s.contact_groups = target_contact_groups
s.stalking_options = target_stalking_options
s.notes = target_notes
s.notes_url = target_notes_url
s.action_url = target_action_url
s.icon_image = target_icon_image
s.icon_image_alt = target_icon_image_alt
# Save
s.save()
print "service added"
# Add servicegroup object
sg = Model.Servicegroup()
sg.set_filename('/etc/nagios/objects/solution1/{0}.cfg'.format(target_hostgroup_name))
# Adding all attributes to allow any to be added if needed
sg.servicegroup_name = target_servicegroup_name
sg.alias = target_alias
sg.members = target_members
sg.servicegroup_members = target_servicegroup_members
sg.notes = target_notes
sg.notes_url = target_notes_url
sg.action_url = '/etc/nagios/objects/solution1/{0}.cfg'.format(target_hostgroup_name)
# Save
sg.save()
print "service group added"
try:
current_file = csv.reader(open(input_file, "rb"), delimiter='\t')
except:
logging.error('No such file or directory. Please try again')
else:
for line in current_file:
for row in current_file:
target_hostgroup_name = row[3]
target_alias = row[4]
target_members = row[5]
target_hostgroup_members = row[6]
target_notes = row[7]
target_notes_url = row[8]
target_action_url = row[9]
target_host_name = row[10]
target_service_description = row[11]
target_display_name = row[12]
target_servicegroups = row[13]
target_is_volatile = row[14]
target_check_command = row[15]
target_initial_state = row[16]
target_max_check_attempts = row[17]
target_check_interval = row[18]
target_retry_interval = row[19]
target_active_checks_enabled = row[20]
target_passive_checks_enabled = row[21]
target_check_period = row[22]
target_obsess_over_service = row[23]
target_check_freshness = row[24]
target_freshness_threshold = row[25]
target_event_handler = row[26]
target_event_handler_enabled = row[27]
target_low_flap_threshold = row[28]
target_high_flap_threshold = row[29]
target_flap_detection_enabled = row[30]
target_flap_detection_options = row[31]
target_process_perf_data = row[32]
target_retain_status_information = row[33]
target_retain_nonstatus_information = row[34]
target_notification_interval = row[35]
target_first_notification_delay = row[36]
target_notification_period = row[37]
target_notification_options = row[38]
target_notifications_enabled = row[39]
target_contacts = row[40]
target_contact_groups = row[41]
target_stalking_options = row[42]
target_icon_image = row[43]
target_icon_image_alt = row[44]
target_servicegroup_name = row[45]
target_servicegroup_members = row[46]

If the values are in the same order every time, you could consider populating a list that you then could loop over, instead of doing it one by one.
For the "target" portion of your script, you could nest another loop for range(3, 46) as well, and pass the index to your list instead of manually for every number from 3 to 46.

Why do you do this?
for line in current_file:
for row in current_file:
If the first row is a header row and you're skipping it on purpose, you can use a DictReader instead.
It doesn't look like you'll be able to do much to clean this up, but you could factor out each "section" into its own function:
def save_hostgroup(name, alias, members, hostgroup_members, notes, notes_url, action_url):
hg = Model.Hostgroup()
hg.set_filename('/etc/nagios/objects/solution1/{0}.cfg'.format(target_hostgroup_name))
# Adding all attributes to allow any to be added if needed
hg.hostgroup_name = target_hostgroup_name
hg.alias = target_alias
hg.members = target_members
hg.hostgroup_members = target_hostgroup_members
hg.notes = target_notes
hg.notes_url = target_notes_url
hg.action_url = target_action_url
hg.save()

Behind the scenes all the member names of an object are stored in a dict. You can access this dict with vars(obj) or obj.__dict__. You can then use the update method of the dict to add a set of names to your object.
eg.
class SomeClass:
def __str__(self):
return "SomeClass({})".format(
", ".join(
"{}={!r}".format(key, value)
for key, value in self.__dict__.items()
)
)
__repr__ = __str__
target_names = ['var_a', 'var_b', 'var_c']
target_values = [1, 2, 3]
target = dict(zip(target_names, target_values))
assert target == {'var_a': 1, 'var_b': 2, 'var_c': 3}
s = SomeClass()
vars(s).update(target)
assert hasattr(s, 'var_a')
assert s.var_a == 1
print(s) # prints SomeClass(var_c=3, var_a=1, var_b=2)

Related

Parse xml w/ xsd to CSV with Python?

I am trying to parse a very large XML file which I downloaded from OSHA's website and convert it into a CSV so I can use it in a SQLite database along with some other spreadsheets. I would just use an online converter, but the osha file is apparently too big for all of them.
I wrote a script in Python which looks like this:
import csv
import xml.etree.cElementTree as ET
tree = ET.parse('data.xml')
root = tree.getroot()
xml_data_to_csv =open('Out.csv', 'w')
list_head=[]
Csv_writer=csv.writer(xml_data_to_csv)
count=0
for element in root.findall('data'):
List_nodes =[]
if count== 0:
inspection_number = element.find('inspection_number').tag
list_head.append(inspection_number)
establishment_name = element.find('establishment_name').tag
list_head.append(establishment_name)
city = element.find('city')
list_head.append(city)
state = element.find('state')
list_head.append(state)
zip_code = element.find('zip_code')
list_head.append(zip_code)
sic_code = element.find('sic_code')
list_head.append(sic_code)
naics_code = element.find('naics_code')
list_head.append(naics_code)
sampling_number = element.find('sampling_number')
list_head.append(sampling_number)
office_id = element.find('office_id')
list_head.append(office_id)
date_sampled = element.find('date_sampled')
list_head.append(date_sampled)
date_reported = element.find('date_reported')
list_head.append(date_reported)
eight_hour_twa_calc = element.find('eight_hour_twa_calc')
list_head.append(eight_hour_twa_calc)
instrument_type = element.find('instrument_type')
list_head.append(instrument_type)
lab_number = element.find('lab_number')
list_head.append(lab_number)
field_number = element.find('field_number')
list_head.append(field_number)
sample_type = element.find('sample_type')
list_head.append(sample_type)
blank_used = element.find('blank_used')
list_head.append(blank_used)
time_sampled = element.find('time_sampled')
list_head.append(time_sampled)
air_volume_sampled = element.find('air_volume_sampled')
list_head.append(air_volume_sampled)
sample_weight = element.find('sample_weight')
list_head.append(sample_weight)
imis_substance_code = element.find('imis_substance_code')
list_head.append(imis_substance_code)
substance = element.find('substance')
list_head.append(substance)
sample_result = element.find('sample_result')
list_head.append(sample_result)
unit_of_measurement = element.find('unit_of_measurement')
list_head.append(unit_of_measurement)
qualifier = element.find('qualifier')
list_head.append(qualifier)
Csv_writer.writerow(list_head)
count = +1
inspection_number = element.find('inspection_number').text
List_nodes.append(inspection_number)
establishment_name = element.find('establishment_name').text
List_nodes.append(establishment_name)
city = element.find('city').text
List_nodes.append(city)
state = element.find('state').text
List_nodes.append(state)
zip_code = element.find('zip_code').text
List_nodes.append(zip_code)
sic_code = element.find('sic_code').text
List_nodes.append(sic_code)
naics_code = element.find('naics_code').text
List_nodes.append(naics_code)
sampling_number = element.find('sampling_number').text
List_nodes.append(sampling_number)
office_id = element.find('office_id').text
List_nodes.append(office_id)
date_sampled = element.find('date_sampled').text
List_nodes.append(date_sampled)
date_reported = element.find('date_reported').text
List_nodes.append(date_reported)
eight_hour_twa_calc = element.find('eight_hour_twa_calc').text
List_nodes.append(eight_hour_twa_calc)
instrument_type = element.find('instrument_type').text
List_nodes.append(instrument_type)
lab_number = element.find('lab_number').text
List_nodes.append(lab_number)
field_number = element.find('field_number').text
List_nodes.append(field_number)
sample_type = element.find('sample_type').text
List_nodes.append(sample_type)
blank_used = element.find('blank_used').text
List_nodes.append()
time_sampled = element.find('time_sampled').text
List_nodes.append(time_sampled)
air_volume_sampled = element.find('air_volume_sampled').text
List_nodes.append(air_volume_sampled)
sample_weight = element.find('sample_weight').text
List_nodes.append(sample_weight)
imis_substance_code = element.find('imis_substance_code').text
List_nodes.append(imis_substance_code)
substance = element.find('substance').text
List_nodes.append(substance)
sample_result = element.find('sample_result').text
List_nodes.append(sample_result)
unit_of_measurement = element.find('unit_of_measurement').text
List_nodes.append(unit_of_measurement)
qualifier= element.find('qualifier').text
List_nodes.append(qualifier)
Csv_writer.writerow(List_nodes)
xml_data_to_csv.close()
But when I run the code I get a CSV with nothing in it. I suspect this may have something to do with the XSD file associated with the XML, but I'm not totally sure.
Does anyone know what the issue is here?
The code below is a 'compact' version of your code.
It assumes that the XML structure looks like in the script variable xml. (Based on https://www.osha.gov/opengov/sample_data_2011.zip)
The main difference bwtween this sample code and yours is that I define the fields that I want to collect once (see FIELDS) and I use this definition across the script.
import xml.etree.ElementTree as ET
FIELDS = ['lab_number', 'instrument_type'] # TODO add more fields
xml = '''<main xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="health_sample_data.xsd">
<DATA_RECORD>
<inspection_number>316180165</inspection_number>
<establishment_name>PROFESSIONAL ENGINEERING SERVICES, LLC.</establishment_name>
<city>EUFAULA</city>
<state>AL</state>
<zip_code>36027</zip_code>
<sic_code>1799</sic_code>
<naics_code>238990</naics_code>
<sampling_number>434866166</sampling_number>
<office_id>418600</office_id>
<date_sampled>2011-12-30</date_sampled>
<date_reported>2011-12-30</date_reported>
<eight_hour_twa_calc>N</eight_hour_twa_calc>
<instrument_type>TBD</instrument_type>
<lab_number>L13645</lab_number>
<field_number>S1</field_number>
<sample_type>B</sample_type>
<blank_used>N</blank_used>
<time_sampled></time_sampled>
<air_volume_sampled></air_volume_sampled>
<sample_weight></sample_weight>
<imis_substance_code>S777</imis_substance_code>
<substance>Soil</substance>
<sample_result>0</sample_result>
<unit_of_measurement>AAAAA</unit_of_measurement>
<qualifier></qualifier>
</DATA_RECORD>
<DATA_RECORD>
<inspection_number>315516757</inspection_number>
<establishment_name>MARGUERITE CONCRETE CO.</establishment_name>
<city>WORCESTER</city>
<state>MA</state>
<zip_code>1608</zip_code>
<sic_code>1771</sic_code>
<naics_code>238110</naics_code>
<sampling_number>423259902</sampling_number>
<office_id>112600</office_id>
<date_sampled>2011-12-30</date_sampled>
<date_reported>2011-12-30</date_reported>
<eight_hour_twa_calc>N</eight_hour_twa_calc>
<instrument_type>GRAV</instrument_type>
<lab_number>L13355</lab_number>
<field_number>9831B</field_number>
<sample_type>P</sample_type>
<blank_used>N</blank_used>
<time_sampled>184</time_sampled>
<air_volume_sampled>340.4</air_volume_sampled>
<sample_weight>.06</sample_weight>
<imis_substance_code>9135</imis_substance_code>
<substance>Particulates not otherwise regulated (Total Dust)</substance>
<sample_result>0.176</sample_result>
<unit_of_measurement>M</unit_of_measurement>
<qualifier></qualifier>
</DATA_RECORD></main>'''
root = ET.fromstring(xml)
records = root.findall('.//DATA_RECORD')
with open('out.csv', 'w') as out:
out.write(','.join(FIELDS) + '\n')
for record in records:
values = [record.find(f).text for f in FIELDS]
out.write(','.join(values) + '\n')
out.csv
lab_number,instrument_type
L13645,TBD
L13355,GRAV

Parsing Security Matrix Spreadsheet - NoneType is not Iterable

Trying to Nest no's and yes's with their respective applications and services.
That way when a request comes in for a specific zone to zone sequence, a check can be run against this logic to verify accepted requests.
I have tried calling Decision_List[Zone_Name][yes_no].update and i tried ,append when it was a list type and not a dict but there is no update method ?
Base_Sheet = range(5, sh.ncols)
Column_Rows = range(1, sh.nrows)
for colnum in Base_Sheet:
Zone_Name = sh.col_values(colnum)[0]
Zone_App_Header = {sh.col_values(4)[0]:{}}
Zone_Svc_Header = {sh.col_values(3)[0]:{}}
Zone_Proto_Header = {sh.col_values(2)[0]:{}}
Zone_DestPort_Header = {sh.col_values(1)[0]: {}}
Zone_SrcPort_Header = {sh.col_values(0)[0]: {}}
Decision_List = {Zone_Name:{}}
for rows in Column_Rows:
app_object = sh.col_values(4)[rows]
svc_object = sh.col_values(3)[rows]
proto_object = sh.col_values(3)[rows]
dst_object = sh.col_values(2)[rows]
src_object = sh.col_values(1)[rows]
yes_no = sh.col_values(colnum)[rows]
if yes_no not in Decision_List[Zone_Name]:
Decision_List[Zone_Name][yes_no] = [app_object]
else:
Decision_List[Zone_Name]=[yes_no].append(app_object)
I would like it present info as follows
Decision_List{Zone_Name:{yes:[ssh, ssl, soap], no:
[web-browsing,facebook]}}
I would still like to know why i couldnt call the append method on that specific yes_no key whos value was a list.
But in the mean time, i made a work around of sorts. I created a set as the key and gave the yes_no as the value. this will allow me to pair many no type values with the keys being a set of the application, port, service, etc.. and then i can search for yes values and create additional dicts out of them for logic.
Any better ideas out there i am all ears.
for rownum in range(0, sh.nrows):
#row_val is all the values in the row of cell.index[rownum] as determined by rownum
row_val = sh.row_values(rownum)
col_val = sh.col_values(rownum)
print rownum, col_val[0], col_val[1: CoR]
header.append({col_val[0]: col_val[1: CoR]})
print header[0]['Start Port']
dec_tree = {}
count = 1
Base_Sheet = range(5, sh.ncols)
Column_Rows = range(1, sh.nrows)
for colnum in Base_Sheet:
Zone_Name = sh.col_values(colnum)[0]
Zone_App_Header = {sh.col_values(4)[0]:{}}
Zone_Svc_Header = {sh.col_values(3)[0]:{}}
Zone_Proto_Header = {sh.col_values(2)[0]:{}}
Zone_DestPort_Header = {sh.col_values(1)[0]: {}}
Zone_SrcPort_Header = {sh.col_values(0)[0]: {}}
Decision_List = {Zone_Name:{}}
for rows in Column_Rows:
app_object = sh.col_values(4)[rows]
svc_object = sh.col_values(3)[rows]
proto_object = sh.col_values(3)[rows]
dst_object = sh.col_values(2)[rows]
src_object = sh.col_values(1)[rows]
yes_no = sh.col_values(colnum)[rows]
for rule_name in Decision_List.iterkeys():
Decision_List[Zone_Name][(app_object, svc_object, proto_object)]= yes_no
Thanks again.
I think still a better way is to use collections.defaultdict
In this manner it will ensure that i am able to append to the specific yes_no as i had originally intended.
for colnum in Base_Sheet:
Zone_Name = sh.col_values(colnum)[0]
Zone_App_Header = {sh.col_values(4)[0]:{}}
Zone_Svc_Header = {sh.col_values(3)[0]:{}}
Zone_Proto_Header = {sh.col_values(2)[0]:{}}
Zone_DestPort_Header = {sh.col_values(1)[0]: {}}
Zone_SrcPort_Header = {sh.col_values(0)[0]: {}}
Decision_List = {Zone_Name:defaultdict(list)}
for rows in Column_Rows:
app_object = sh.col_values(4)[rows]
svc_object = sh.col_values(3)[rows]
proto_object = sh.col_values(2)[rows]
dst_object = sh.col_values(1)[rows]
src_object = sh.col_values(0)[rows]
yes_no = sh.col_values(colnum)[rows]
if yes_no not in Decision_List[Zone_Name]:
Decision_List[Zone_Name][yes_no]= [app_object, svc_object, proto_object, dst_object, src_object]
else:
Decision_List[Zone_Name][yes_no].append([(app_object, svc_object, proto_object,dst_object, src_object)])
This allows me to then set the values as a set and append them as needed

Why using bulk_create on Django to insert data with foreign keys returns "property object is not callable"?

I'm using Django 2.17 with a SQLite 3.26 database and trying to insert data from a csv file. I was using the get_or_create method, but it was too slow. So I start to try to insert using bulk_create.
I have the following fields of Models being used:
class SENSOR_TEMPERATURA(models.Model):
ID_Sensor_Temperatura = models.AutoField(primary_key = True)
class VALOR_TEMPERATURA(models.Model):
ID_Valor_Temperatura = models.AutoField(primary_key = True)
ID_Sensor_Temperatura = models.ForeignKey(SENSOR_TEMPERATURA, on_delete = models.PROTECT, null = False, db_column = 'VATE_CD_ID_Sensor_Temperatura')
Data_De_Medição = models.DateTimeField(default = datetime.now(), null = False)
Valor = models.DecimalField(default = 0, null = False, max_digits = 30, decimal_places = 15)
The code that I'm trying to run is:
print (datetime.datetime.now())
reader = csv.reader(f)
insert_CSV = []
count = 1
for row in reader:
insert_CSV.append([
VALOR_TEMPERATURA.pk(ID_Valor_Temperatura = count),
VALOR_TEMPERATURA(Data_De_Medição = datetime.datetime.strptime(row[0] + " UTC-0300",'%d/%m/%Y %H:%M:%S %Z%z')),
VALOR_TEMPERATURA(Valor = float(row[1])),
VALOR_TEMPERATURA(ID_Sensor_Temperatura = SENSOR_TEMPERATURA.objects.get(ID_Sensor_Temperatura = 4))
])
count = count + 1
print (datetime.datetime.now())
VALOR_TEMPERATURA.objects.bulk_create(insert_CSV)
print (datetime.datetime.now())
The part that I think is put me in trouble is "ID_Sensor_Temperatura = SENSOR_TEMPERATURA.objects.get(ID_Sensor_Temperatura = 4))", but it is exactly how I defined the Foreign Key when using get_or_create, so I can't figure out what is the problem.
I'm getting the following error:
6 for row in reader:
7 insert_CSV.append([
8 VALOR_TEMPERATURA.pk(VATE_CD_ID_Valor_Temperatura = count),
9 VALOR_TEMPERATURA(VATE_DF_Data_De_Medição = datetime.datetime.strptime(row[0] + " UTC-0300",'%d/%m/%Y %H:%M:%S %Z%z')),
10 VALOR_TEMPERATURA(VATE_VL_Valor = float(row[1])),
TypeError: 'property' object is not callable
What may be the problem?
This isn't how you write Python. You need to create an instance of the object, passing it the values.
insert_CSV.append(
VALOR_TEMPERATURA(
ID_Valor_Temperatura=count,
Data_De_Medição=datetime.datetime.strptime(row[0] + " UTC-0300",'%d/%m/%Y %H:%M:%S %Z%z')),
Valor=float(row[1]),
ID_Sensor_Temperatura=SENSOR_TEMPERATURA.objects.get(ID_Sensor_Temperatura=4)
)
)
Note also, your models should not be defined in ALL_CAPS, as they are not constants. They shoudl be called ValorTemperatura and SensorTemperatura.

TypeError: 'DataFrame' object is not callable python function

I have two functions, one which creates a dataframe from a csv and another which manipulates that dataframe. There is no problem the first time I pass the raw data through the lsc_age(import_data()) functions. However, I get the above-referenced error (TypeError: 'DataFrame' object is not callable) upon second+ attempts. Any ideas for how to solve the problem?
def import_data(csv,date1,date2):
global data
data = pd.read_csv(csv,header=1)
data = data.iloc[:,[0,1,4,6,7,8,9,11]]
data = data.dropna(how='all')
data = data.rename(columns={"National: For Dates 9//1//"+date1+" - 8//31//"+date2:'event','Unnamed: 1':'time','Unnamed: 4':'points',\
'Unnamed: 6':'name','Unnamed: 7':'age','Unnamed: 8':'lsc','Unnamed: 9':'club','Unnamed: 11':'date'})
data = data.reset_index().drop('index',axis=1)
data = data[data.time!='Time']
data = data[data.points!='Power ']
data = data[data['event']!="National: For Dates 9//1//"+date1+" - 8//31//"+date2]
data = data[data['event']!='USA Swimming, Inc.']
data = data.reset_index().drop('index',axis=1)
for i in range(len(data)):
if len(str(data['event'][i])) <= 3:
data['event'][i] = data['event'][i-1]
else:
data['event'][i] = data['event'][i]
data = data.dropna()
age = []
event = []
gender = []
for row in data.event:
gender.append(row.split(' ')[0])
if row[:9]=='Female 10':
n = 4
groups = row.split(' ')
age.append(' '.join(groups[1:n]))
event.append(' '.join(groups[n:]))
elif row[:7]=='Male 10':
n = 4
groups = row.split(' ')
age.append(' '.join(groups[1:n]))
event.append(' '.join(groups[n:]))
else:
n = 2
groups = row.split(' ')
event.append(' '.join(groups[n:]))
groups = row.split(' ')
age.append(groups[1])
data['age_group'] = age
data['event_simp'] = event
data['gender'] = gender
data['year'] = date2
return data
def lsc_age(data_two):
global lsc, lsc_age, top, all_performers
lsc = pd.DataFrame(data_two['event'].groupby(data_two['lsc']).count()).reset_index().sort_values(by='event',ascending=False)
lsc_age = data_two.groupby(['year','age_group','lsc'])['event'].count().reset_index().sort_values(by=['age_group','event'],ascending=False)
top = pd.concat([lsc_age[lsc_age.age_group=='10 & under'].head(),lsc_age[lsc_age.age_group=='11-12'].head(),\
lsc_age[lsc_age.age_group=='13-14'].head(),lsc_age[lsc_age.age_group=='15-16'].head(),\
lsc_age[lsc_age.age_group=='17-18'].head()],ignore_index=True)
all_performers = pd.concat([lsc_age[lsc_age.age_group=='10 & under'],lsc_age[lsc_age.age_group=='11-12'],\
lsc_age[lsc_age.age_group=='13-14'],lsc_age[lsc_age.age_group=='15-16'],\
lsc_age[lsc_age.age_group=='17-18']],ignore_index=True)
all_performers = all_performers.rename(columns={'event':'no. top 100'})
all_performers['age_year_lsc'] = all_performers.age_group+' '+all_performers.year.astype(str)+' '+all_performers.lsc
return all_performers
years = [i for i in range(2008,2018)]
for i in range(len(years)-1):
lsc_age(import_data(str(years[i+1])+"national100.csv",\
str(years[i]),str(years[i+1])))
During the first call to your function lsc_age() in line
lsc_age = data_two.groupby(['year','age_group','lsc'])['event'].count().reset_index().sort_values(by=['age_group','event'],ascending=False)
you are overwriting your function object with a dataframe. This is happening since you imported the function object from the global namespace with
global lsc, lsc_age, top, all_performers
Functions in Python are objects. Please see more information about this here.
To solve your problem, try to avoid the global imports. They do not seem to be necessary. Try to pass your data around through the arguments of the function.

What Should Be In My Return?

I am using Python to parse an XML response from a SOAP web-service. The Customer returns about 40 values as you can see below. I would like to know if there is a way to make it so I only have to type one thing into my return statement and get all of the values returned? I tried to use for customer in doc.findall('.//Customer').itervalues() and that did not work as I believe that call is for dictionaries. Same results and reasoning behind .iteritems.
doc = ET.fromstring(response_xml)
for customer in doc.findall('.//Customer'):
customer_number = customer.findtext('CustomerNumber')
customer_first_name = customer.findtext('FirstName')
customer_last_name = customer.findtext('LastName')
customer_middle_name = customer.findtext('MiddleName')
customer_salutation = customer.findtext('Salutation')
customer_gender = customer.findtext('Gender')
customer_language = customer.findtext('Language')
customer_address1 = customer.findtext('Address1')
customer_address2 = customer.findtext('Address2')
customer_address3 = customer.findtext('Address3')
customer_city = customer.findtext('City')
customer_county = customer.findtext('County')
customer_state_code = customer.findtext('StateCode')
customer_zip_code = customer.findtext('ZipCode')
customer_phone_number = customer.findtext('PhoneNumber')
customer_business_phone = customer.findtext('BusinessPhone')
customer_business_ext = customer.findtext('BusinessExt')
customer_fax_number = customer.findtext('FaxNumber')
customer_birth_date = customer.findtext('BirthDate')
customer_drivers_license = customer.findtext('DriversLicense')
customer_contact = customer.findtext('Contact')
customer_preferred_contact = customer.findtext('PreferredContact')
customer_mail_code = customer.findtext('MailCode')
customer_tax_exempt_Number = customer.findtext('TaxExmptNumber')
customer_assigned_salesperson = customer.findtext('AssignedSalesperson')
customer_type = customer.findtext('CustomerType')
customer_preferred_phone = customer.findtext('PreferredPhone')
customer_cell_phone = customer.findtext('CellPhone')
customer_page_phone = customer.findtext('PagePhone')
customer_other_phone = customer.findtext('OtherPhone')
customer_other_phone_desc = customer.findtext('OtherPhoneDesc')
customer_email1 = customer.findtext('Email1')
customer_email2 = customer.findtext('Email2')
customer_optional_field = customer.findtext('OptionalField')
customer_allow_contact_postal = customer.findtext('AllowContactByPostal')
customer_allow_contact_phone = customer.findtext('AllowContactByPhone')
customer_allow_contact_email = customer.findtext('AllowContactByEmail')
customer_business_phone_ext = customer.findtext('BusinessPhoneExtension')
customer_internatinol_bus_phone = customer.findtext('InternationalBusinessPhone')
customer_international_cell = customer.findtext('InternationalCellPhone')
customer_external_x_reference_key = customer.findtext('ExternalCrossReferenceKey')
customer_international_fax = customer.findtext('InternationalFaxNumber')
customer_international_other_phone = customer.findtext('InternationalOtherPhone')
customer_international_home_phone = customer.findtext('InternationalHomePhone')
customer_preferred_name = customer.findtext('CustomerPreferredName')
customer_international_pager = customer.findtext('InternationalPagerPhone')
customer_preferred_lang = customer.findtext('PreferredLanguage')
customer_last_change_date = customer.findtext('LastChangeDate')
customer_vehicles = customer.findtext('Vehicles')
customer_ccid = customer.findtext('CCID')
customer_cccd = customer.findtext('CCCD')
webservice.close()
return
I would write that as a generator function yielding dicts where the key matches the findtext argument, e.g.:
fields = ['CustomerNumber', 'FirstName', 'LastName',
# ...
]
for customer in doc.findall('.//Customer'):
yield dict((f, customer.findtext(f)) for f in fields)
You either want to return a list of dicts:
customers = []
for customer in doc.findall('.//Customer'):
customer_dict = {}
customer_dict['number'] = customer.findtext('CustomerNumber')
customer_dict['first_name'] = customer.findtext('FirstName')
customer_dict['last_name'] = customer.findtext('LastName')
# ad nauseum
customers.append(customer_dict)
webservice.close()
return customers
Or you make a Customer class that handles this, and you return a list of customer instances.
I would use a dictionary of dictionaries:
doc = ET.fromstring(response_xml)
customers = {}
cust_dict = {}
for customer in doc.findall('.//Customer'):
cust_dict['customer_number'] = customer.findtext('CustomerNumber')
cust_dict['customer_first_name'] = customer.findtext('FirstName')
cust_dict['customer_last_name'] = customer.findtext('LastName')
snip snip...
customers[customer_number] = cust_dict # or whatever property you want to use to identify each customer, I'm assuming customer_number is some sort of id number
webservice.close()
return customers
That is if you don't have a class you can use to create a Customer object.

Categories

Resources