How to style pandas dataframe using for loop - python

I have a dataset where I need to display different values with different colors. Not all the cells in the data are highlighted and only some of the data is highlighted.
Here are some of the colors:
dict_colors = {'a': 'red', 'b': 'blue','e':'tomato'}
How can I highlight all these cells with given colors?
MWE
# data
import pandas as pd
df = pd.DataFrame({'A': list('abcdef'), 'B': list('aabbcc'), 'C': list('aaabbb')})
# without for loop
(df.style
.apply(lambda dfx: ['background: red' if val == 'a' else '' for val in dfx], axis = 1)
.apply(lambda dfx: ['background: blue' if val == 'b' else '' for val in dfx], axis = 1)
)
# How to do this using for loop (I have so many values and different colors for them)
# My attempt
dict_colors = {'a': 'red', 'b': 'blue','e':'tomato'}
s = df.style
for key,color in dict_colors.items():
s = s.apply(lambda dfx: [f'background: {color}' if cell == key else '' for cell in dfx], axis = 1)
display(s)

You can try that:
import pandas as pd
df = pd.DataFrame({'A': list('abcdef'), 'B': list('aabbcc'), 'C': list('aaabbb')})
dict_colors = {'a': 'red', 'b': 'blue', 'e':'tomato'}
# create a Styler object for the DataFrame
s = df.style
def apply_color(val):
if val in dict_colors:
return f'background: {dict_colors[val]}'
return ''
# apply the style to each cell
s = s.applymap(apply_color)
# display the styled DataFrame
display(s)

I found a way using eval method, it is not the most elegant method but it works.
import pandas as pd
df = pd.DataFrame({'A': list('abcdef'), 'B': list('aabbcc'), 'C': list('aaabbb')})
dict_colors = {'a': 'red', 'b': 'blue','e':'tomato'}
lst = [ 'df.style']
for key,color in dict_colors.items():
text = f".apply(lambda dfx: ['background: {color}' if cell == '{key}' else '' for cell in dfx], axis = 1)"
lst.append(text)
s = ''.join(lst)
display(eval(s))

Related

python pandas dataframe add colour to adjusted and inserted row

I have the following data-frame
import pandas as pd
df = pd.DataFrame()
df['number'] = (651,651,651,4267,4267,4267,4267,4267,4267,4267,8806,8806,8806,6841,6841,6841,6841)
df['name']=('Alex','Alex','Alex','Ankit','Ankit','Ankit','Ankit','Ankit','Ankit','Ankit','Abhishek','Abhishek','Abhishek','Blake','Blake','Blake','Blake')
df['hours']=(8.25,7.5,7.5,7.5,14,12,15,11,6.5,14,15,15,13.5,8,8,8,8)
df['loc']=('Nar','SCC','RSL','UNIT-C','UNIT-C','UNIT-C','UNIT-C','UNIT-C','UNIT-C','UNIT-C','UNI','UNI','UNI','UNKING','UNKING','UNKING','UNKING')
print(df)
If the running balance of an individuals hours reach 38 an adjustment to the cell that reached the 38th hour is made, a duplicate row is inserted and the balance of hours is added to the following row. The following code performs this and the difference in output of original data to adjusted data can be seen.
s = df.groupby('number')['hours'].cumsum()
m = s.gt(38)
idx = m.groupby(df['number']).idxmax()
delta = s.groupby(df['number']).shift().rsub(38).fillna(s)
out = df.loc[df.index.repeat((df.index.isin(idx)&m)+1)]
out.loc[out.index.duplicated(keep='last'), 'hours'] = delta
out.loc[out.index.duplicated(), 'hours'] -= delta
print(out)
I then output to csv with the following.
out.to_csv('Output.csv', index = False)
I need to have the row that got adjusted and the row that got inserted highlighted in a color (any color) when it is exported to csv.
UPDATE: as csv does not accept colours to output, any way to tag the adjusted and insert rows is acceptable
You can't add any kind of formatting, including colors, to a CSV. You can however color records in a dataframe.
# single-index:
# Load a dataset
import seaborn as sns
df = sns.load_dataset('planets')# Now let's group the data
groups = df.groupby('method').mean()
groups
# Highlight the Maximum values
groups.style.highlight_max(color = 'lightgreen')
# multi-index:
import pandas as pd
df = pd.DataFrame([['one', 'A', 100,3], ['two', 'A', 101, 4],
['three', 'A', 102, 6], ['one', 'B', 103, 6],
['two', 'B', 104, 0], ['three', 'B', 105, 3]],
columns=['c1', 'c2', 'c3', 'c4']).set_index(['c1', 'c2']).sort_index()
print(df)
def highlight_min(data):
color= 'red'
attr = 'background-color: {}'.format(color)
if data.ndim == 1: # Series from .apply(axis=0) or axis=1
is_min = data == data.min()
return [attr if v else '' for v in is_min]
else:
is_min = data.groupby(level=0).transform('min') == data
return pd.DataFrame(np.where(is_min, attr, ''),
index=data.index, columns=data.columns)
df = df.apply(highlight_min, axis=0)
df

How to flag an outlier(s) /anomaly in selected columns in python?

In the dataset df below. I want to flag the anomalies in all columns except A, B,C and L.
Any value less than 1500 or greater than 400000 is regarded as an anomaly.
import pandas as pd
# intialise data of lists
data = {
'A':['T1', 'T2', 'T3', 'T4', 'T5'],
'B':[1,1,1,1,1],
'C':[1,2,3,5,9],
'D':[12005, 18190, 1034, 15310, 31117],
'E':[11021, 19112, 19021, 12, 24509 ],
'F':[10022,19910, 19113,19999, 25519],
'G':[14029, 29100, 39022, 24509, 412262],
'H':[52119,32991,52883,69359,57835],
'J':[41218, 52991,55121,69152,79355],
'K': [43211,8199991,56881,212,77342],
'L': [1,0,1,0,0],
'M': [31211,42901,53818,62158,69325],
}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
df
Attempt:
exclude_cols = ['A','B','C','L']
def flag_outliers(s, exclude_cols):
if s.name in exclude_cols:
return '' # or None, or whatever df.style() needs
else:
s = pd.to_numeric(s, errors='coerce')
indexes = (s<1500)|(s>400000)
return ['background-color: red' if v else '' for v in indexes]
df.style.apply(lambda s: flag_outliers(s, exclude_cols), axis=1)
Result of the code:
Desired output should look like this:
Thanks for the effort!
If you set the subset as the argument of the apply function, you will get what you want.
exclude_cols = ['A','B','C','L']
def flag_outliers(s, exclude_cols):
if s.name in exclude_cols:
print(s.name)
return '' # or None, or whatever df.style() needs
else:
s = pd.to_numeric(s, errors='coerce')
indexes = (s<1500)|(s>400000)
return ['background-color: yellow' if v else '' for v in indexes]
df.style.apply(lambda s: flag_outliers(s, exclude_cols), axis=1, subset=['D','E','F','G','H','J','K'])

Why does the export of a styled pandas dataframe to Excel not work?

I would like to apply the same background color to cells that have for each PEOPLE instance the name and the related name. I have tried to df.style.applymap, it does not return an error but it does not seem to work. Anyone has any ideas why? Thank you.
clrs = list(mcolors.CSS4_COLORS.keys())
for k in range(len(PEOPLE)):
if PEOPLE[k].attribute == 'child':
df1_data = [PEOPLE[k].name, PEOPLE[k].related]
df.style.applymap([lambda x: 'background-color: yellow' if x in df1_data else 'background-color: red'])
df.to_excel('styledz.xlsx', engine='openpyxl')
Here is some more info on df.style. Here I'm using some simple example because I don't have your data available:
import pandas as pd
import numpy as np
df = pd.DataFrame({'a': np.random.randint(0, 10, 10), 'b': np.random.randint(0, 10, 10), 'late': np.random.choice([0, 1], 10).astype(np.bool)})
def highlight_late(s):
return ['background-color: red' if s['late'] else 'background-color: green' for s_ in s]
df = df.style.apply(highlight_late, axis=1)
df.to_excel('style.xlsx', engine='openpyxl')
Looks in the excel file like this:
For cell based coloring use:
def highlight_late(s):
return ['background-color: red' if s_ else 'background-color: green' for s_ in s]
df = df.style.apply(highlight_late, subset=["late"], axis=1)
This gives you:
Basically your solution will be a modification of the following:
df = DataFrame([['mark', 2], ['mike', 4], ['manny', 6]], columns=['name', 'attribute'])
def style_row(row, people):
output = Series("", index=row.index)
if row["name"] in people:
output['attribute'] = "background-color:red;"
return output
styler = df.style.apply(style_row, axis=1, people=['mark', 'manny'])
styler

Dataframe styling in Jupyter Notebook

I have a pandas dataframe to which I applied cell coloring based on the values in a second dataframe. (The 2 dataframes are the same size). I did this based on this SO answer shown here:
Now that I've colored the dataframe, the cell outlines have disappeared. I saw a suggestion to use the following to add cell outlines:
df_navigator = df_navigator.data.style.set_properties(**{'text-align': 'left','border-color':'Black','border-width':'thin','border-style':'dotted'})
If I do that, then the cell coloring disappears.
How can I keep the custom cell coloring while adding the black borders back in to the displayed dataframe?
Adding the full code for reproducing:
import pandas as pd
import numpy as np
df = pd.DataFrame({
'A': 'foo bar foo'.split(),
'B': 'one one two'.split(),
'C': np.arange(3),
'D': np.arange(3) * 2
})
j = [
{ 'bgcolor': '#55aa2a'},
{ 'bgcolor': '#d42a2a'},
{ 'bgcolor': '#d42a2a'},
]
df2 = pd.DataFrame({
'E': j,
'F': j,
'G': j,
'H': j
})
df2 = df2.applymap(lambda x: 'background-color: {}'.format(x.get('bgcolor')))
def highlight(x):
return pd.DataFrame(df2.values, columns = x.columns)
df.style.apply(highlight, axis=None)
Thanks in advance!

How to use tqdm with map for Dataframes

Can I use tqdm progress bar with map function to loop through dataframe/series rows?
specifically, for the following case:
def example(x):
x = x + 2
return x
if __name__ == '__main__':
dframe = pd.DataFrame([{'a':1, 'b': 1}, {'a':2, 'b': 2}, {'a':3, 'b': 3}])
dframe['b'] = dframe['b'].map(example)
Due to the integration of tqdm with pandas you can use progress_map function instead of map function.
Note: for this to work you should add tqdm.pandas() line to your code.
So try this:
from tqdm import tqdm
def example(x):
x = x + 2
return x
tqdm.pandas() # <- added this line
if __name__ == '__main__':
dframe = pd.DataFrame([{'a':1, 'b': 1}, {'a':2, 'b': 2}, {'a':3, 'b': 3}])
dframe['b'] = dframe['b'].progress_map(example) # <- progress_map here
Here is the documentation reference:
(after adding tqdm.pandas()) ... you can use progress_apply instead of apply and progress_map
instead of map

Categories

Resources