I have a pandas dataframe, which is something like shown below.
I would like to format the column "Pass/Fail" as if Fail --> red background, else green background, like:
I have tried to use Pandas to do the formatting, but it fails to add color to the excel. Following is the code:
writer = pandas.ExcelWriter(destination,engine = 'xlsxwriter')
color = Answer.style.applymap(lambda x: 'color: red' if x == "Fail" else 'color: green',subset= pandas.IndexSlice[:,['Pass/Fail']])
color.to_excel(writer,'sheet1')
I tried StyleFrame which failed to install. Seems that StyleFrame does not comply with my python version 3.6.
How can I format the excel as I want?
You can use conditional_format:
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
print (df)
Pass/Fail expect
0 Pass 1
1 Fail 2
2 Fail 3
writer = pd.ExcelWriter('pandas_conditional.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
red_format = workbook.add_format({'bg_color':'red'})
green_format = workbook.add_format({'bg_color':'green'})
worksheet.conditional_format('B2:B4', {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format('B2:B4', {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
More dynamic solution with get_loc for position of column and mapping with dictionary:
import string
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
print (df)
Pass/Fail expect
0 Pass 1
1 Fail 2
2 Fail 3
writer = pd.ExcelWriter('pandas_conditional.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
red_format = workbook.add_format({'bg_color':'red'})
green_format = workbook.add_format({'bg_color':'green'})
#dict for map excel header, first A is index, so omit it
d = dict(zip(range(25), list(string.ascii_uppercase)[1:]))
print (d)
{0: 'B', 1: 'C', 2: 'D', 3: 'E', 4: 'F', 5: 'G', 6: 'H', 7: 'I', 8: 'J',
9: 'K', 10: 'L', 11: 'M', 12: 'N', 13: 'O', 14: 'P', 15: 'Q', 16: 'R',
17: 'S', 18: 'T', 19: 'U', 20: 'V', 21: 'W', 22: 'X', 23: 'Y', 24: 'Z'}
#set column for formatting
col = 'Pass/Fail'
excel_header = str(d[df.columns.get_loc(col)])
#get length of df
len_df = str(len(df.index) + 1)
rng = excel_header + '2:' + excel_header + len_df
print (rng)
B2:B4
worksheet.conditional_format(rng, {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format(rng, {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
EDIT1:
Thank you jmcnamara for comment and for XlsxWriter
col = 'Pass/Fail'
loc = df.columns.get_loc(col) + 1
len_df = len(df.index) + 1
worksheet.conditional_format(1,loc,len_df,loc, {'type': 'text',
'criteria': 'containing',
'value': 'Fail',
'format': red_format})
worksheet.conditional_format(1,loc,len_df,loc, {'type': 'text',
'criteria': 'containing',
'value': 'Pass',
'format': green_format})
writer.save()
EDIT:
Another solution with last version of pandas (0.20.1) and styles:
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':['d','f','g']})
print (df)
Pass/Fail expect
0 Pass d
1 Fail f
2 Fail g
def f(x):
col = 'Pass/Fail'
r = 'background-color: red'
g = 'background-color: green'
c = np.where(x[col] == 'Pass', g, r)
y = pd.DataFrame('', index=x.index, columns=x.columns)
y[col] = c
return y
styled = df.style.apply(f, axis=None)
styled.to_excel('styled.xlsx', engine='openpyxl')
Disclaimer: I wrote the following library
I'd like to suggest using StyleFrame:
import pandas as pd
from StyleFrame import StyleFrame, Styler
df = pd.DataFrame({'Pass/Fail':['Pass','Fail','Fail'],
'expect':[1,2,3]})
sf = StyleFrame(df)
sf.apply_style_by_indexes(sf[sf['Pass/Fail'] == 'Pass'], cols_to_style='Pass/Fail',
styler_obj=Styler(bg_color='green'))
sf.apply_style_by_indexes(sf[sf['Pass/Fail'] == 'Fail'], cols_to_style='Pass/Fail',
styler_obj=Styler(bg_color='red'))
sf.to_excel('test.xlsx').save()
Since it bridges the gap between pandas and openpyxl, the styling is done on the dataframe level instead of the worksheet level (so for example you don't need to know the relevant cell range is B2:B4 or mess with indexes.
The code above outputs the following:
EDIT: Just saw you mentioned you've tried to install but got an error. Can you edit your question and include the error?
If have one or more columns and more than two values to format, and want to apply multiple format rules at once then you can do the following:
def fmt(data, fmt_dict):
return data.replace(fmt_dict)
styled = df.style.apply(fmt, fmt_dict=fmt_dict, subset=['Test_1', 'Test_2' ])
styled.to_excel('styled.xlsx', engine='openpyxl')
Above, fm_dict is a dictionary with the values mapped to the corresponding format:
fmt_dict = {
'Pass': 'background-color: green',
'Fail': 'background-color: red',
'Pending': 'background-color: yellow; border-style: solid; border-color: blue'; color: red,
}
Notice that for the 'Pending' value, you can also specify multiple format rules (e.g. border, background color, foreground color)
(Requires: openpyxl and jinja2)
Here is a full running example:
import pandas as pd
df = pd.DataFrame({'Test_1':['Pass','Fail', 'Pending', 'Fail'],
'expect':['d','f','g', 'h'],
'Test_2':['Pass','Pending', 'Pass', 'Fail'],
})
fmt_dict = {
'Pass': 'background-color: green',
'Fail': 'background-color: red',
'Pending': 'background-color: yellow; border-style: solid; border-color: blue; color:red',
}
def fmt(data, fmt_dict):
return data.replace(fmt_dict)
styled = df.style.apply(fmt, fmt_dict=fmt_dict, subset=['Test_1', 'Test_2' ])
styled.to_excel('styled.xlsx', engine='openpyxl')
Assuming there is a list with sublists like this
[[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]......]
which is an output of def list_of_names() where 2013 is year, M is gender and 1356 is number of M births etc.
And I want to create a dictionary which outputs the name as a key and values as tuples (year, number_of_males,number_of_females) . So for example:
{ .. ’Patric’:[... , (1993, 0, 7), (2013, 1356, 6), ... ], ... }.
Technically 1993 is year, 0 is number of males and 7 is number of females and the tuples should be arranged in order of the years.
and I'm stuck on how to add this info into a dictionary
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
from collections import defaultdict
def list_of_names():
return [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
def name_Index():
tmp = defaultdict(lambda:defaultdict(lambda: [0,0]))
for year, name, sex, N in list_of_names():
i = 0 if sex == 'M' else 1
tmp[name][year][i] += N
d = {}
for name, entries in tmp.items():
d[name] = [(year, M, F) for (year, (M,F)) in entries.items()]
return d
print name_Index()
This was my attempt at the problem:
from collections import defaultdict, namedtuple
from itertools import groupby
data = [[2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7]]
names = defaultdict(list)
datum = namedtuple('datum', 'year gender number')
for k, g in groupby(data, key=lambda x: x[1]):
for l in g:
year, name, gender, number = l
names[k].append(datum(year, gender, number))
final_dict = defaultdict(list)
for n in names:
for k, g in groupby(names[n], lambda x: x.year):
males = 0
females = 0
for l in g:
if l.gender == 'M':
males += l.number
elif l.gender == 'F':
females += l.number
final_dict[n].append((k, males, females))
print(final_dict)
The most convenient will be to use collections.defauldict. It returns dictionary-like object, that returns default value, if it doesn't find key. In your case, you use a list as default value, and in your loop you append tuples to it:
from collections import defaultdict
names = [ [2013, 'Patric', 'M', 1356],
[2013, 'Helena', 'F', 202],
[2013, 'Patric', 'F', 6],
[1993, 'Patric', 'F', 7] ]
def name_Index(data):
# name => year => sex
d = defaultdict(lambda: defaultdict(lambda: {'F': 0, 'M': 0}))
for year, name, sex, births in data:
d[name][year][sex] += births
# if you are fine with defauldict result: return d
# else collect results into tuples:
result = {}
for name, data in d.items():
result[name] = [(year, c['M'], c['F']) for year, c in data.items()]
return result
print name_Index(names)
# {'Helena': [(2013, 0, 202)], 'Patric': [(1993, 0, 7), (2013, 1356, 6)]}
I didn't understand why you are taking names as an argument of name_Index function and then calling readNames, there must be some necessity required for your work. Hence, i just put a dummy readNames function and sent None as argument to name_Index. Using class is a good technique to solve complicated data structures. Btw, nicely written question i must admit.
def readNames ():
return [[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]]
class YearOb(object):
def __init__(self):
self.male = 0
self.female = 0
def add_birth_data(self, gender, birth_count):
if gender == "M":
self.male += birth_count
else:
self.female += birth_count
class NameOb(object):
def __init__(self):
self.yearobs = dict()
def add_record(self, year, gender, birth_count):
if year not in self.yearobs:
self.yearobs[year]=YearOb()
self.yearobs[year].add_birth_data(gender, birth_count)
def get_as_list(self):
list_data = []
for year, yearob in self.yearobs.items():
list_data.append((year, yearob.male, yearob.female))
return list_data
def name_Index(names):
d = dict()
L = readNames() #the list with from previous def which outputs different names and info as above
newlist = []
for sublist in L:
name = sublist[1]
if name not in d:
d[name]=NameOb()
d[name].add_record(sublist[0], sublist[2], sublist[3])
for name, nameob in d.items():
d[name] = nameob.get_as_list()
return d
print(name_Index(None))
I need to read the following data out of a text file;
[L02]
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,w,g,g
g,g,g,g,g,g,g,g,w,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,g,g,g,g,g,g
g,g,g,g,g,g,w,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
[L01]
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
I can read a single block as a csv file but I don't know how to read each file as a separate list
The output I want is to have arrays/lists for each block with the block contents as the list elements. Any ideas?
Here's a script that demonstrates how to break down the problem into reusable steps (functions) and performs the transformation your need.
import itertools
import operator
import re
import csv
import pprint
class TaggedLine(str):
"""
Override str to allow a tag to be added.
"""
def __new__(cls, val, tag):
return str.__new__(cls, val)
def __init__(self, val, tag):
super(TaggedLine, self).__init__(val)
self.tag = tag
def sections(stream):
"""
Tag each line of the stream with its [section] (or None)
"""
section_pattern = re.compile('\[(.*)\]')
section = None
for line in stream:
matcher = section_pattern.match(line)
if matcher:
section = matcher.group(1)
continue
yield TaggedLine(line, section)
def splitter(stream):
"""
Group each stream into sections
"""
return itertools.groupby(sections(stream), operator.attrgetter('tag'))
def parsed_sections(stream):
for section, lines in splitter(stream):
yield section, list(csv.reader(lines))
if __name__ == '__main__':
with open('data.csv') as stream:
for section, data in parsed_sections(stream):
print 'section', section
pprint.pprint(data[:2])
Save your file as 'data.csv' and the script will run on your data with this output:
section L02
[['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w',
'w',
'w',
'w',
'g',
'g'],
['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w',
'w',
'w',
'w',
'w',
'g',
'g']]
section L01
[['d',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd'],
['d',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd']]
If you have numpy, you could read the file into a numpy array. comments='[' tells np.genfromtxt to ignore lines that begin with [. The reshape method places each 16x16 block in its own "layer".
import numpy as np
arr=np.genfromtxt('data.csv',comments='[',delimiter=',',dtype=None)
arr=arr.reshape(-1,16,16)
You can access the nth layer with arr[n].