Output to screen and csv format python - python

I have a python nested dictionary output, I have been able to remove the first set of cruly brackets using RocketDict, but 1) I can't remove the second set of curly brackets 2)I tried to export it to a csv file giving the column names and that doesn't work because I can't figure out how to get the int#/# values that increment in the rows. For Example here was my initial output:
Before RocketDict:
{ intx/x : {'value1: 'A', 'value2: 'B', value3: 'C'},
inty/y : {'value1: 'X', 'value2: 'Y', value3: 'Z'}}
After the RocketDict:
intx/x : {'value1: 'A', 'value2: 'B', value3: 'C'},
inty/y : {'value1: 'X', 'value2: 'Y', value3: 'Z'}
Desired output:
intx/x : 'value1: 'A', 'value2: 'B', value3: 'C',
inty/y : 'value1: 'X', 'value2: 'Y', value3: 'Z'
Desired output to the csv:
Here is the full script:
results = requests.get(url, headers=headers)
inventory = results.json()
data = inventory['config']
class RocketDict(UserDict):
def __str__(self):
r = ['']
r.extend(['\t{} : {}'.format(k, v)
for k, v in self.items()])
return ',\n'.join(r)
if __name__ == '__main__':
#standard dict object
# inventory = {('key-%02d' % v): v for v in range(1, 10)}
# print(inventory, '\n')
# Wrap that dict object into a RocketDict.
d2 = RocketDict(data)
print(d2)
csv_columns = ['value1','value2','value3']
dict_data = d2
csv_file = 'mycsv.csv'
try:
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in dict_data:
writer.writerow(d2)
except IOError:
print("I/O error")

Use pandas -
RocketDict ={ 'intx/x' : {'value1': 'A', 'value2': 'B', 'value3': 'C'},
'inty/y' : {'value1': 'X', 'value2': 'Y', 'value3': 'Z'}}
import pandas as pd
pd.DataFrame(RocketDict).transpose().to_csv('out.csv', index =True)

Related

Row comparison and append loop by columns

I have a bunch of school data that I maintain on a master list for monthly testing scores. Everytime a child takes a score and there is an update on 'Age', 'Score', 'School' I would insert a new row with updated data and keep track of all the changes. I am trying to figure out a python script to do this but since I am a newbie, I keep running in to issues.
I tried writing a loop but keep getting errors to include "False", "The Truth value of a series is ambigious", "tuple indices must be integers, not str"
master_df = pd.DataFrame({'ID': ['A', 'B', 'C', 'D'],
'Age':[15,14,17,13],
'School':['AB', 'CD', 'EF', 'GH'],
'Score':[80, 75, 62, 100],
'Date': ['3/1/2019', '3/1/2019', '3/1/2019', '3/1/2019']})
updates_df = pd.DataFrame({'ID': ['A', 'B', 'C', 'D'],
'Age':[16,14,17,13],
'School':['AB', 'ZX', 'EF', 'GH'],
'Score':[80, 90, 62, 100],
'Date': ['4/1/2019', '4/1/2019', '4/1/2019', '4/1/2019']})
# What I am trying to get is:
updated_master = pd.DataFrame({'ID': ['A', 'A', 'B', 'B', 'C','D'],
'Age':[15,16,14,14,17,13],
'School':['AB', 'AB', 'CD', 'ZX', 'EF', 'GH'],
'Score':[80, 80, 75, 90, 62, 100],
'Date': ['3/1/2019', '4/1/2019', '3/1/2019', '4/1/2019', '3/1/2019', '3/1/2019']})
temp_delta_list = []
m_score = master_df.iloc[1:, master_df.columns.get_loc('Score')]
m_age = master_df.iloc[1:, master_df.columns.get_loc('Age')]
m_school = master_df.iloc[1:, master_df.columns.get_loc('School')]
u_score = updates_df.iloc[1:, updates_df.columns.get_loc('Score')]
u_age = updates_df.iloc[1:, updates_df.columns.get_loc('Age')]
u_school = updates_df.iloc[1:, updates_df.columns.get_loc('School')]
for i in updates_df['ID'].values:
updated_temp_score = updates_df[updates_df['ID'] == i], u_score
updated_temp_age = updates_df[updates_df['ID'] == i], u_age
updated_temp_school = updates_df[updates_df['ID'] == i], u_school
master_temp_score = master_df[master_df['ID'] == i], m_score
master_temp_age = master_df[master_df['ID'] == i], m_age
master_temp_school = updates_df[master_df['ID'] == i], m_school
if (updated_temp_score == master_temp_score) | (updated_temp_age == master_temp_age) | (updated_temp_school == master_temp_school):
pass
else:
temp_deltas = updates_df[(updates_df['ID'] == i)]
temp_delta_list.append(temp_deltas)
I ultimately want to have the loop compare each row values for each ID and return rows that have any difference and then append the master_df

Python - Using pandas to format excel cell

I have a pandas dataframe, which is something like shown below.
I would like to format the column "Pass/Fail" as if Fail --> red background, else green background, like:
I have tried to use Pandas to do the formatting, but it fails to add color to the excel. Following is the code:
writer = pandas.ExcelWriter(destination,engine = 'xlsxwriter')
color = Answer.style.applymap(lambda x: 'color: red' if x == "Fail" else 'color: green',subset= pandas.IndexSlice[:,['Pass/Fail']])
color.to_excel(writer,'sheet1')
I tried StyleFrame which failed to install. Seems that StyleFrame does not comply with my python version 3.6.
How can I format the excel as I want?
You can use conditional_format:
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
print (df)
Pass/Fail expect
0 Pass 1
1 Fail 2
2 Fail 3
writer = pd.ExcelWriter('pandas_conditional.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
red_format = workbook.add_format({'bg_color':'red'})
green_format = workbook.add_format({'bg_color':'green'})
worksheet.conditional_format('B2:B4', {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format('B2:B4', {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
More dynamic solution with get_loc for position of column and mapping with dictionary:
import string
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
print (df)
Pass/Fail expect
0 Pass 1
1 Fail 2
2 Fail 3
writer = pd.ExcelWriter('pandas_conditional.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
red_format = workbook.add_format({'bg_color':'red'})
green_format = workbook.add_format({'bg_color':'green'})
#dict for map excel header, first A is index, so omit it
d = dict(zip(range(25), list(string.ascii_uppercase)[1:]))
print (d)
{0: 'B', 1: 'C', 2: 'D', 3: 'E', 4: 'F', 5: 'G', 6: 'H', 7: 'I', 8: 'J',
9: 'K', 10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R',
17: 'S', 18: 'T', 19: 'U', 20: 'V', 21: 'W', 22: 'X', 23: 'Y', 24: 'Z'}
#set column for formatting
col = 'Pass/Fail'
excel_header = str(d[df.columns.get_loc(col)])
#get length of df
len_df = str(len(df.index) + 1)
rng = excel_header + '2:' + excel_header + len_df
print (rng)
B2:B4
worksheet.conditional_format(rng, {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format(rng, {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
EDIT1:
Thank you jmcnamara for comment and for XlsxWriter
col = 'Pass/Fail'
loc = df.columns.get_loc(col) + 1
len_df = len(df.index) + 1
worksheet.conditional_format(1,loc,len_df,loc, {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format(1,loc,len_df,loc, {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
EDIT:
Another solution with last version of pandas (0.20.1) and styles:
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':['d','f','g']})
print (df)
Pass/Fail expect
0 Pass d
1 Fail f
2 Fail g
def f(x):
col = 'Pass/Fail'
r = 'background-color: red'
g = 'background-color: green'
c = np.where(x[col] == 'Pass', g, r)
y = pd.DataFrame('', index=x.index, columns=x.columns)
y[col] = c
return y
styled = df.style.apply(f, axis=None)
styled.to_excel('styled.xlsx', engine='openpyxl')
Disclaimer: I wrote the following library
I'd like to suggest using StyleFrame:
import pandas as pd
from StyleFrame import StyleFrame, Styler
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
sf = StyleFrame(df)
sf.apply_style_by_indexes(sf[sf['Pass/Fail'] == 'Pass'], cols_to_style='Pass/Fail',
styler_obj=Styler(bg_color='green'))
sf.apply_style_by_indexes(sf[sf['Pass/Fail'] == 'Fail'], cols_to_style='Pass/Fail',
styler_obj=Styler(bg_color='red'))
sf.to_excel('test.xlsx').save()
Since it bridges the gap between pandas and openpyxl, the styling is done on the dataframe level instead of the worksheet level (so for example you don't need to know the relevant cell range is B2:B4 or mess with indexes.
The code above outputs the following:
EDIT: Just saw you mentioned you've tried to install but got an error. Can you edit your question and include the error?
If have one or more columns and more than two values to format, and want to apply multiple format rules at once then you can do the following:
def fmt(data, fmt_dict):
return data.replace(fmt_dict)
styled = df.style.apply(fmt, fmt_dict=fmt_dict, subset=['Test_1', 'Test_2' ])
styled.to_excel('styled.xlsx', engine='openpyxl')
Above, fm_dict is a dictionary with the values mapped to the corresponding format:
fmt_dict = {
'Pass': 'background-color: green',
'Fail': 'background-color: red',
'Pending': 'background-color: yellow; border-style: solid; border-color: blue'; color: red,
}
Notice that for the 'Pending' value, you can also specify multiple format rules (e.g. border, background color, foreground color)
(Requires: openpyxl and jinja2)
Here is a full running example:
import pandas as pd
df = pd.DataFrame({'Test_1':['Pass','Fail', 'Pending', 'Fail'],
'expect':['d','f','g', 'h'],
'Test_2':['Pass','Pending', 'Pass', 'Fail'],
})
fmt_dict = {
'Pass': 'background-color: green',
'Fail': 'background-color: red',
'Pending': 'background-color: yellow; border-style: solid; border-color: blue; color:red',
}
def fmt(data, fmt_dict):
return data.replace(fmt_dict)
styled = df.style.apply(fmt, fmt_dict=fmt_dict, subset=['Test_1', 'Test_2' ])
styled.to_excel('styled.xlsx', engine='openpyxl')

python preg_replace translate message to gsm formatted message

i am trying to to make a replace in order to send a gsm message later. i use the function below where it takes the message as parameter and convert it to a gsm formatted message.
import re
#staticmethod
def gsm_message(message):
expressions = {
'/[άΆαΑ]/u': 'A',
'/[βΒ]/u': 'B',
'/[έΈεΕ]/u': 'E',
......... more
}
translated_message = re.sub(expressions.keys(), expressions.values(), message)
print(translated_message)
the error i get when i am trying to print is: unhashable type: 'list'.
what can i do to make it work?
The following works in Python3.
import re
def gsm_message(message):
expressions = {
'ά': 'A',
'Ά': 'A',
'α': 'A',
'Α': 'A',
'β': 'B',
'Β': 'B',
'έ': 'E',
'Έ': 'E',
'ε': 'E',
'Ε': 'E',
}
pattern = re.compile('|'.join(expressions.keys()))
translated_message = pattern.sub(lambda x: expressions[x.group()], message)
print(translated_message)
message = "ββααέ"
gsm_message(message)
Gives:
BBAAE

Adding list in form of tuples to a dictionary

Assuming there is a list with sublists like this
[[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]......]
which is an output of def list_of_names() where 2013 is year, M is gender and 1356 is number of M births etc.
And I want to create a dictionary which outputs the name as a key and values as tuples (year, number_of_males,number_of_females) . So for example:
{ .. ’Patric’:[... , (1993, 0, 7), (2013, 1356, 6), ... ], ... }.
Technically 1993 is year, 0 is number of males and 7 is number of females and the tuples should be arranged in order of the years.
and I'm stuck on how to add this info into a dictionary
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
from collections import defaultdict
def list_of_names():
return [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
def name_Index():
tmp = defaultdict(lambda:defaultdict(lambda: [0,0]))
for year, name, sex, N in list_of_names():
i = 0 if sex == 'M' else 1
tmp[name][year][i] += N
d = {}
for name, entries in tmp.items():
d[name] = [(year, M, F) for (year, (M,F)) in entries.items()]
return d
print name_Index()
This was my attempt at the problem:
from collections import defaultdict, namedtuple
from itertools import groupby
data = [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
names = defaultdict(list)
datum = namedtuple('datum', 'year gender number')
for k, g in groupby(data, key=lambda x: x[1]):
for l in g:
year, name, gender, number = l
names[k].append(datum(year, gender, number))
final_dict = defaultdict(list)
for n in names:
for k, g in groupby(names[n], lambda x: x.year):
males = 0
females = 0
for l in g:
if l.gender == 'M':
males += l.number
elif l.gender == 'F':
females += l.number
final_dict[n].append((k, males, females))
print(final_dict)
The most convenient will be to use collections.defauldict. It returns dictionary-like object, that returns default value, if it doesn't find key. In your case, you use a list as default value, and in your loop you append tuples to it:
from collections import defaultdict
names = [ [2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7] ]
def name_Index(data):
# name => year => sex
d = defaultdict(lambda: defaultdict(lambda: {'F': 0, 'M': 0}))
for year, name, sex, births in data:
d[name][year][sex] += births
# if you are fine with defauldict result: return d
# else collect results into tuples:
result = {}
for name, data in d.items():
result[name] = [(year, c['M'], c['F']) for year, c in data.items()]
return result
print name_Index(names)
# {'Helena': [(2013, 0, 202)], 'Patric': [(1993, 0, 7), (2013, 1356, 6)]}
I didn't understand why you are taking names as an argument of name_Index function and then calling readNames, there must be some necessity required for your work. Hence, i just put a dummy readNames function and sent None as argument to name_Index. Using class is a good technique to solve complicated data structures. Btw, nicely written question i must admit.
def readNames ():
return [[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]]
class YearOb(object):
def __init__(self):
self.male = 0
self.female = 0
def add_birth_data(self, gender, birth_count):
if gender == "M":
self.male += birth_count
else:
self.female += birth_count
class NameOb(object):
def __init__(self):
self.yearobs = dict()
def add_record(self, year, gender, birth_count):
if year not in self.yearobs:
self.yearobs[year]=YearOb()
self.yearobs[year].add_birth_data(gender, birth_count)
def get_as_list(self):
list_data = []
for year, yearob in self.yearobs.items():
list_data.append((year, yearob.male, yearob.female))
return list_data
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
name = sublist[1]
if name not in d:
d[name]=NameOb()
d[name].add_record(sublist[0], sublist[2], sublist[3])
for name, nameob in d.items():
d[name] = nameob.get_as_list()
return d
print(name_Index(None))

read a multi tier csv file in python

I need to read the following data out of a text file;
[L02]
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,w,g,g
g,g,g,g,g,g,g,g,w,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,g,g,g,g,g,g
g,g,g,g,g,g,w,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
[L01]
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
I can read a single block as a csv file but I don't know how to read each file as a separate list
The output I want is to have arrays/lists for each block with the block contents as the list elements. Any ideas?
Here's a script that demonstrates how to break down the problem into reusable steps (functions) and performs the transformation your need.
import itertools
import operator
import re
import csv
import pprint
class TaggedLine(str):
"""
Override str to allow a tag to be added.
"""
def __new__(cls, val, tag):
return str.__new__(cls, val)
def __init__(self, val, tag):
super(TaggedLine, self).__init__(val)
self.tag = tag
def sections(stream):
"""
Tag each line of the stream with its [section] (or None)
"""
section_pattern = re.compile('\[(.*)\]')
section = None
for line in stream:
matcher = section_pattern.match(line)
if matcher:
section = matcher.group(1)
continue
yield TaggedLine(line, section)
def splitter(stream):
"""
Group each stream into sections
"""
return itertools.groupby(sections(stream), operator.attrgetter('tag'))
def parsed_sections(stream):
for section, lines in splitter(stream):
yield section, list(csv.reader(lines))
if __name__ == '__main__':
with open('data.csv') as stream:
for section, data in parsed_sections(stream):
print 'section', section
pprint.pprint(data[:2])
Save your file as 'data.csv' and the script will run on your data with this output:
section L02
[['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w',
'w',
'w',
'w',
'g',
'g'],
['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w',
'w',
'w',
'w',
'w',
'g',
'g']]
section L01
[['d',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd'],
['d',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd']]
If you have numpy, you could read the file into a numpy array. comments='[' tells np.genfromtxt to ignore lines that begin with [. The reshape method places each 16x16 block in its own "layer".
import numpy as np
arr=np.genfromtxt('data.csv',comments='[',delimiter=',',dtype=None)
arr=arr.reshape(-1,16,16)
You can access the nth layer with arr[n].

Categories

Resources