Combine entries in a single legend from Plotly subplots - python - python

The following plots two separate scatterplots using Plotly. I want to combine the points from each subplot into a single legend. However, if I plot the figure as is, there are some duplicate entries. On the other hand, if I hide a legend from a certain subplot, not all entries are displayed.
df = pd.DataFrame({'Type' : ['1','1','1','1','1','2','2','2','2','2'],
'Category' : ['A','D','D','D','F','B','D','A','D','E']
})
df['Color'] = df['Category'].map(dict(zip(df['Category'].unique(),
px.colors.qualitative.Dark24[:len(df['Category'].unique())])))
df = pd.concat([df]*10, ignore_index = True)
df['Lat'] = np.random.randint(0, 20, 100)
df['Lon'] = np.random.randint(0, 20, 100)
Color = df['Color'].unique()
Category = df['Category'].unique()
cats = dict(zip(Color, Category))
df_type_1 = df[df['Type'] == '1'].copy()
df_type_2 = df[df['Type'] == '2'].copy()
fig = make_subplots(
rows = 1,
cols = 2,
specs = [[{"type": "scattermapbox"}, {"type": "scattermapbox"}]],
vertical_spacing = 0.05,
horizontal_spacing = 0.05
)
for c in df_type_1['Color'].unique():
df_color = df_type_1[df_type_1['Color'] == c]
fig.add_trace(go.Scattermapbox(
lat = df_color['Lat'],
lon = df_color['Lon'],
mode = 'markers',
name = cats[c],
marker = dict(color = c),
opacity = 0.8,
#legendgroup = 'group2',
#showlegend = True,
),
row = 1,
col = 1
)
for c in df_type_2['Color'].unique():
df_color = df_type_2[df_type_2['Color'] == c]
fig.add_trace(go.Scattermapbox(
lat = df_color['Lat'],
lon = df_color['Lon'],
mode = 'markers',
name = cats[c],
marker = dict(color = c),
opacity = 0.8,
#legendgroup = 'group2',
#showlegend = False,
),
row = 1,
col = 2
)
fig.update_layout(height = 600, width = 800, margin = dict(l = 10, r = 10, t = 30, b = 10));
fig.update_layout(mapbox1 = dict(zoom = 2, style = 'carto-positron'),
mapbox2 = dict(zoom = 2, style = 'carto-positron'),
)
fig.show()
output: duplicate entries
if I use showlegend = False on either subplot, then the legend will not show all applicable entries.
output: (subplot 2 showlegend = False)

The best way to remove duplicate legends at this time is to use set() to remove duplicates from the created legend and update it with that content. I am saving this as a snippet. I am getting the snippet from this answer. I have also changed the method to use the color information set in the columns. I have also redesigned it so that it can be created in a single loop process without creating an extra data frame.
fig = make_subplots(
rows = 1,
cols = 2,
specs = [[{"type": "scattermapbox"}, {"type": "scattermapbox"}]],
vertical_spacing = 0.05,
horizontal_spacing = 0.05
)
for t in df['Type'].unique():
dff = df.query('Type ==#t')
for c in dff['Category'].unique():
dffc = dff.query('Category == #c')
fig.add_trace(go.Scattermapbox(
lat = dffc['Lat'],
lon = dffc['Lon'],
mode = 'markers',
name = c,
marker = dict(color = dffc['Color']),
opacity = 0.8,
),
row = 1,
col = int(t)
)
fig.update_layout(height = 600, width = 800, margin = dict(l = 10, r = 10, t = 30, b = 10));
fig.update_layout(mapbox1 = dict(zoom = 2, style = 'carto-positron'),
mapbox2 = dict(zoom = 2, style = 'carto-positron'),
)
names = set()
fig.for_each_trace(
lambda trace:
trace.update(showlegend=False)
if (trace.name in names) else names.add(trace.name))
fig.show()

Related

Python Query in Dataframe

I am trying to get specific country in my dataframe but somehow its producing an error when I add the and condition inside the query.
Below is my code.
top_country = 15
country_wise_dataframe_temp = country_wise_dataframe.query('Country == "USA" and Country == "Canada"')
fig_confirmed = px.bar(country_wise_dataframe_temp.sort_values('Confirmed').tail(top_country), x = 'Confirmed', y = 'Country',
text = 'Confirmed', orientation = 'h', color_discrete_sequence=[cnf])
fig_deaths = px.bar(country_wise_dataframe.sort_values('Deaths').tail(top_country), x = 'Deaths', y = 'Country',
text = 'Deaths', orientation = 'h', color_discrete_sequence=[dth])
fig_death_x_confirmed = make_subplots(rows = 5, cols = 2, shared_xaxes = False, horizontal_spacing = 0.14,
vertical_spacing = 0.1, subplot_titles=('Confirmed Cases', 'Deaths Reported'))
fig_death_x_confirmed.add_trace(fig_confirmed['data'][0], row = 1, col = 1)
fig_death_x_confirmed.add_trace(fig_deaths['data'][0], row = 1, col = 2)
fig_death_x_confirmed.update_layout(height = 3000)
country_wise_dataframe_temp.show()
Below is the error message.
IndexError Traceback (most recent call last) <ipython-input-388-e5656e4a8eeb> in <module>
11 vertical_spacing = 0.1, subplot_titles=('Confirmed Cases', 'Deaths Reported'))
12
---> 13 fig_death_x_confirmed.add_trace(fig_confirmed['data'][0], row = 1, col = 1)
14 fig_death_x_confirmed.add_trace(fig_deaths['data'][0], row = 1, col = 2)
15
IndexError: tuple index out of range
Basically I just wanted to get the data from all country in USA and Canada.
So I got it now. Instead of using and. I should be using or
country_wise_dataframe_temp = country_wise_dataframe.query('Country == "USA" |Country == "Canada"')

add color legend to map subplots

For this example, I modified the dataset and code from https://plotly.com/python/map-subplots-and-small-multiples/ to add a column to plot it in colors.
What I want to do here and in my dataset is to add a legend with a color scale.
Here the range is the same (0-9) among maps, so, a general legend or a legend for each subplot would work. Here the general legend is wrong.
Related: Plotly contour subplots each having their own colorbar
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/1962_2006_walmart_store_openings.csv')
df.head()
# new column
import numpy as np
df["counts"] = np.random.choice(range(0,10),df.shape[0])
data = []
layout = dict(
title = 'New Walmart Stores per year 1962-2006<br>\
Source: <a href="http://www.econ.umn.edu/~holmes/data/WalMart/index.html">\
University of Minnesota</a>',
# showlegend = False,
autosize = False,
width = 1000,
height = 900,
hovermode = False,
legend = dict(
x=0.7,
y=-0.1,
bgcolor="rgba(255, 255, 255, 0)",
font = dict( size=11 ),
)
)
years = df['YEAR'].unique()
for i in range(len(years)):
geo_key = 'geo'+str(i+1) if i != 0 else 'geo'
lons = list(df[ df['YEAR'] == years[i] ]['LON'])
lats = list(df[ df['YEAR'] == years[i] ]['LAT'])
mycolor = list(df[ df['YEAR'] == years[i] ]['counts']) # new
# Walmart store data
data.append(
dict(
type = 'scattergeo',
showlegend=False,
lon = lons,
lat = lats,
geo = geo_key,
name = int(years[i]),
marker = dict(
color = mycolor, # new
#color = "rgb(0, 0, 255)",
opacity = 0.5
,showscale=True # new
)
)
)
# Year markers
data.append(
dict(
type = 'scattergeo',
showlegend = False,
lon = [-78],
lat = [47],
geo = geo_key,
text = [years[i]],
mode = 'text',
)
)
layout[geo_key] = dict(
scope = 'usa',
showland = True,
landcolor = 'rgb(229, 229, 229)',
showcountries = False,
domain = dict( x = [], y = [] ),
subunitcolor = "rgb(255, 255, 255)",
)
z = 0
COLS = 5
ROWS = 9
for y in reversed(range(ROWS)):
for x in range(COLS):
geo_key = 'geo'+str(z+1) if z != 0 else 'geo'
layout[geo_key]['domain']['x'] = [float(x)/float(COLS), float(x+1)/float(COLS)]
layout[geo_key]['domain']['y'] = [float(y)/float(ROWS), float(y+1)/float(ROWS)]
z=z+1
if z > 42:
break
fig = go.Figure(data=data, layout=layout)
fig.update_layout(width=800)
config = {'staticPlot': True}
fig.show(config=config)
I had to build the coordinates for each colorbar:
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/1962_2006_walmart_store_openings.csv')
df.head()
z1 = z = 0
COLS = 5
ROWS = 9
mylist = [[],[]]
for y in reversed(range(ROWS)):
for x in range(COLS):
mylist[0].append((float(x+1)/float(COLS))-.02)
mylist[1].append((float(y+1)/float(ROWS))-.05)
z1=z1+1
if z1 > 42:
break
# new column
import numpy as np
df["counts"] = np.random.choice(range(0,10),df.shape[0])
data = []
layout = dict(
title = 'New Walmart Stores per year 1962-2006<br>\
Source: <a href="http://www.econ.umn.edu/~holmes/data/WalMart/index.html">\
University of Minnesota</a>',
# showlegend = False,
autosize = False,
width = 1000,
height = 900,
hovermode = False,
legend = dict(
x=0.7,
y=-0.1,
bgcolor="rgba(255, 255, 255, 0)",
font = dict( size=11 ),
)
)
years = df['YEAR'].unique()
for i in range(len(years)):
geo_key = 'geo'+str(i+1) if i != 0 else 'geo'
lons = list(df[ df['YEAR'] == years[i] ]['LON'])
lats = list(df[ df['YEAR'] == years[i] ]['LAT'])
mycolor = list(df[ df['YEAR'] == years[i] ]['counts']) # new
# Walmart store data
data.append(
dict(
type = 'scattergeo',
showlegend=False,
lon = lons,
lat = lats,
geo = geo_key,
name = int(years[i]),
marker = dict(
color = mycolor, # new
#color = "rgb(0, 0, 255)",
opacity = 0.5,
showscale=True
,colorbar=dict(len=0.1
, x=mylist[0][i]
, y=mylist[1][i]
,thickness=5
)
)
)
)
# Year markers
data.append(
dict(
type = 'scattergeo',
showlegend = False,
lon = [-78],
lat = [47],
geo = geo_key,
text = [years[i]],
mode = 'text',
)
)
layout[geo_key] = dict(
scope = 'usa',
showland = True,
landcolor = 'rgb(229, 229, 229)',
showcountries = False,
domain = dict( x = [], y = [] ),
subunitcolor = "rgb(255, 255, 255)",
)
for y in reversed(range(ROWS)):
for x in range(COLS):
geo_key = 'geo'+str(z+1) if z != 0 else 'geo'
layout[geo_key]['domain']['x'] = [float(x)/float(COLS), float(x+1)/float(COLS)]
layout[geo_key]['domain']['y'] = [float(y)/float(ROWS), float(y+1)/float(ROWS)]
z=z+1
if z > 42:
break
fig = go.Figure(data=data, layout=layout)
fig.update_layout(width=800)
config = {'staticPlot': True}
fig.show(config=config)

Python Table Conditional Formatting

I have written a code to create a data table and save it as an image. Here is the code-
df_avg = data_rtm_market.groupby('date').mean()
base = datetime.datetime.today()
date_list = [base - datetime.timedelta(days=x) for x in range(1,8)]
dtr = [x.strftime("%d-%m-%Y") for x in date_list]
df_avg.reset_index(inplace=True)
last_7_day = df_avg[df_avg['date'].isin(dtr)]
data_rtm_market.date = pd.to_datetime(data_rtm_market.date,format="%d-%m-%Y")
dam_market.date = pd.to_datetime(dam_market.date,format="%d-%m-%Y")
final_ = pd.merge(data_rtm_market,dam_market,how='inner', on=['date','block'])
df = final_
df[['total_purchase_bid','total_sell_bid','total_cleared_vol']]=df.groupby('date')['purchase_bid', 'sell_bid', 'cleared_volume'].transform('sum')
df[['max_mcp_rtm', 'max_mcp_dam']]=final_.groupby('date')['mcp_x','mcp_y'].transform('max')
df[['avg_mcp_rtm','avg_mcp_dam']]=final_.groupby('date')['mcp_x','mcp_y'].transform('mean')
df[['min_mcp_rtm','min_mcp_dam']]=final_.groupby('date')['mcp_x','mcp_y'].transform('min')
summary = df[['date','total_purchase_bid',
'total_sell_bid', 'total_cleared_vol', 'max_mcp_rtm', 'max_mcp_dam',
'avg_mcp_rtm', 'avg_mcp_dam', 'min_mcp_rtm', 'min_mcp_dam']]
table = summary.drop_duplicates(keep='first')
table1 = table.tail(8)
table1 = table1.iloc[:-1]
col3 = table1[['total_purchase_bid',
'total_sell_bid', 'total_cleared_vol']].apply(lambda x:round((x/4000),2))
col4 = table1[['max_mcp_rtm', 'max_mcp_dam',
'avg_mcp_rtm', 'avg_mcp_dam', 'min_mcp_rtm', 'min_mcp_dam']].apply(lambda x:round(x,2))
final_temp = pd.concat([table1['date'],col3, col4], axis=1, sort=False)
final_temp['date'] = final_temp['date'].dt.strftime('%d-%m-%Y')
final_temp = final_temp.set_index('date').T
final_temp.reset_index(inplace=True,drop=True)
final_temp.insert(0,'1', ["Volume(MUs)","",""," Price(Rs/kWh)","","Price(Rs/kWh)","","Price(Rs/kWh)",""])
final_temp.insert(1,'2', ["Buy ","Sell ","Cleared","Max RTM","Max DAM","Avg RTM","Avg DAM","Min RTM","Min DAM"])
def render_mpl_table(data, col_width=3.0, row_height=0.825, font_size=26,
header_color='#5DADE2', row_colors=['#f1f1f2', 'w'], edge_color='black',
bbox=[0, 0, 1, 1], header_columns=0,
ax=None, **kwargs):
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
mpl_table.set_fontsize(font_size)
for k, cell in six.iteritems(mpl_table._cells):
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='white')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
return fig
final_temp.columns.values[0] = ""
final_temp.columns.values[1] = ""
fig = render_mpl_table(final_temp, header_columns=0, col_width=4)
fig.savefig("/content/assets/summary_table.jpeg",bbox_inches='tight')
This is giving me the data table as below-
I want to use conditional formatting in the cells such that, for example if Max RTM is greater than Max DAM, then the cell background or cell text turns green, and if Max RTM is less than Max DAM, the cell background or cell text turns red. Any way of doing it?
Also how can I merge the text in the first column?
You can do the following if you transpose your table:
import pandas as pd
#Initialize random dataframe
df_test = pd.DataFrame(np.random.normal(size = (10,2)),
columns=['A', 'B'])
#Style functions
def compare_columns(data, column1, column2, color1, color2):
attr = 'background-color: {}'
if data[column1] > data[column2]:
return [attr.format(color1) for s in data]
elif data[column1] <= data[column2]:
return [attr.format(color2) for s in data]
df_test.style.apply(compare_columns, column1 = 'A',
column2 = 'B', color1 = 'red',
color2 = 'green', axis=1)
Output:
See answer here (Coloring Cells in Pandas) and pandas docs (https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html) for more details.

Splittig data in python dataframe and getting the array values automatically

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('D:\ history/segment.csv')
data = pd.DataFrame(data)
data = data.sort_values(['Prob_score'], ascending=[False])
one = len(data)
actualpaid_overall = len(data.loc[data['paidstatus'] == 1])
data_split = np.array_split(data, 10)
data1 = data_split[0]
actualpaid_ten = len(data1.loc[data1['paidstatus'] == 1])
percent_ten = actualpaid_ten/actualpaid_overall
data2 = data_split[1]
actualpaid_twenty = len(data2.loc[data2['paidstatus'] == 1])
percent_twenty = (actualpaid_twenty/actualpaid_overall) + percent_ten
data3 = data_split[2]
actualpaid_thirty = len(data3.loc[data3['paidstatus'] == 1])
percent_thirty = (actualpaid_thirty/actualpaid_overall) + percent_twenty
data4 = data_split[3]
actualpaid_forty = len(data4.loc[data4['paidstatus'] == 1])
percent_forty = (actualpaid_forty/actualpaid_overall) + percent_thirty
data5 = data_split[4]
actualpaid_fifty = len(data5.loc[data5['paidstatus'] == 1])
percent_fifty = (actualpaid_fifty/actualpaid_overall) + percent_forty
data6 = data_split[5]
actualpaid_sixty = len(data6.loc[data6['paidstatus'] == 1])
percent_sixty = (actualpaid_sixty/actualpaid_overall) + percent_fifty
data7 = data_split[6]
actualpaid_seventy = len(data7.loc[data7['paidstatus'] == 1])
percent_seventy = (actualpaid_seventy/actualpaid_overall) + percent_sixty
data8 = data_split[7]
actualpaid_eighty = len(data8.loc[data8['paidstatus'] == 1])
percent_eighty = (actualpaid_eighty/actualpaid_overall) + percent_seventy
data9 = data_split[8]
actualpaid_ninenty = len(data9.loc[data9['paidstatus'] == 1])
percent_ninenty = (actualpaid_ninenty/actualpaid_overall) + percent_eighty
data10 = data_split[9]
actualpaid_hundred = len(data10.loc[data10['paidstatus'] == 1])
percent_hundred = (actualpaid_hundred/actualpaid_overall) + percent_ninenty
array_x = [10,20,30,40,50,60,70,80,90,100]
array_y = [ percent_ten, percent_twenty, percent_thirty, percent_forty,percent_fifty, percent_sixty, percent_seventy, percent_eighty, percent_ninenty, percent_hundred]
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
The above is my python code i have splitted my dataframe into 10 equal sections and plotted the graph but I'm not satisfied with this i have two concerns:
array_x = [10,20,30,40,50,60,70,80,90,100] in this line of code i have manually taken the x values, is there any possible way to process automatically as i have taken split(data,10) it should show 10 array values
As we can see the whole data1,2,3,4...10 is being repeated again and again is there a solution to write this in a function or loop.
Any help with codes will be appreciated. Thanks
I believe you need list comprehension and for count is possible use simplier way - sum of boolean mask, True values are processes like 1, then convert list to numpy array and use numpy.cumsum:
data = pd.read_csv('D:\ history/segment.csv')
data = data.sort_values('Prob_score', ascending=False)
one = len(data)
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
Sample:
np.random.seed(2019)
N = 1000
data = pd.DataFrame({'paidstatus':np.random.randint(3, size=N),
'Prob_score':np.random.randint(100, size=N)})
#print (data)
data = data.sort_values(['Prob_score'], ascending=[False])
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
print (array_x)
[ 100 200 300 400 500 600 700 800 900 1000]
print (array_y)
[0.09118541 0.18844985 0.27963526 0.38601824 0.49848024 0.61702128
0.72036474 0.81155015 0.9331307 1. ]

Add multiple text labels from DataFrame columns in Plotly

The goal is to plot some data using plotly where the text param contains multiple columns.
Here is my DataFrame:
import pandas as pd
import numpy as np
import plotly as py
import plotly.graph_objs as go
np.random.seed(1)
df = pd.DataFrame({'Mean Age': np.random.randint(40,60,10),
'Percent': np.random.randint(20,80,10),
'Number Column': np.random.randint(100,500,10)},
index=list('ABCDEFGHIJ'))
df.index.name = 'Text Column'
df = df.sort_values('Mean Age')
Here is an example of how I plotted the data with text from one column to show on hover:
# trace for Percent
trace0 = go.Scatter(
x = df.index,
y = df['Percent'],
name = 'Percent',
text = df['Mean Age'], # text to show on hover from df column
mode = 'lines+markers',
line = dict(
color = ('rgb(0,0,255)'), # blue
width = 4)
)
layout = dict(title = 'Test Plot',
xaxis = dict(title = 'Text Column'),
yaxis = dict(title = 'Percent'),
)
data = [trace0]
fig = dict(data=data, layout=layout)
py.offline.plot(fig, filename = 'Test_Plot.html')
I am looking to add another column's data to the text param. I can accomplish this by doing some list comprehensions but is there an easier/more efficient way to do this?
I am looking for an output similar to what is below but in a more efficient way than using list comprehension:
# column values to list
num = list(df['Number Column'])
age = list(df['Mean Age'])
# trace for Percent
trace0 = go.Scatter(
x = df.index,
y = df['Percent'],
name = 'Percent',
# list comprehension to get the data to show
text = [f'Number Column: {x}; Mean Age: {y}' for x,y in list(zip(num, age))],
mode = 'lines+markers',
line = dict(
color = ('rgb(0,0,255)'), # blue
width = 4)
)
layout = dict(title = 'Test Plot',
xaxis = dict(title = 'Text Column'),
yaxis = dict(title = 'Percent'),
)
data = [trace0]
fig = dict(data=data, layout=layout)
py.offline.plot(fig, filename = 'Test_Plot_Output.html')
You could also do something along the lines of the following:
trace0 = go.Scatter(
x = df.index,
y = df['Percent'],
name = 'Percent',
# string concatenation in pandas
# also the <br> puts the data on a new line in the hover text
text = "Number Column: " + df["Number Column"].astype(str) + "<br>Mean Age: " + df["Mean Age"].astype(str),
mode = 'lines+markers',
line = dict(
color = ('rgb(0,0,255)'), # blue
width = 4)
)

Categories

Resources