How do you convert number 1.425887B to 1.4 in plotly choropleth ?
data2022 = dict(type = 'choropleth',
colorscale = 'agsunset',
reversescale = True,
locations = df['Country/Territory'],
locationmode = 'country names',
z = df['2022 Population'],
text = df['CCA3' ],
marker = dict(line = dict(color = 'rgb(12, 12, 12)', width=1)),
colorbar = {'title': 'Population'})
layout2022 = dict(title = '<b>World Population 2022<b>',
geo = dict(showframe = True,
showland = True, landcolor = 'rgb(198, 197, 198)',
showlakes = True, lakecolor = 'rgb(85, 173, 240)',
showrivers = True, rivercolor = 'rgb(173, 216, 230)',
showocean = True, oceancolor = 'rgb(173, 216, 230)',
projection = {'type': 'natural earth'}))
choromap2022 = go.Figure(data=[data2022], layout=layout2022)
choromap2022.update_geos(lataxis_showgrid = True, lonaxis_showgrid = True)
choromap2022.update_layout(height = 600,
title_x = 0.5,
title_font_color = 'red',
title_font_family = 'Times New Roman',
title_font_size = 30,
margin=dict(t=80, r=50, l=50))
iplot(choromap2022)
This is the image of the result I got, I want to convert the population of China from 1.425887B to 1.4B
I try to look up on the plotly document but cannot find anything.
This is the output of df.head().to_dict()
'CCA3': {0: 'AFG', 1: 'ALB', 2: 'DZA', 3: 'ASM', 4: 'AND'},
'Country/Territory': {0: 'Afghanistan',
1: 'Albania',
2: 'Algeria',
3: 'American Samoa',
4: 'Andorra'},
'Capital': {0: 'Kabul',
1: 'Tirana',
2: 'Algiers',
3: 'Pago Pago',
4: 'Andorra la Vella'},
'Continent': {0: 'Asia', 1: 'Europe', 2: 'Africa', 3: 'Oceania', 4: 'Europe'},
'2022 Population': {0: 41128771, 1: 2842321, 2: 44903225, 3: 44273, 4: 79824},
'2020 Population': {0: 38972230, 1: 2866849, 2: 43451666, 3: 46189, 4: 77700},
'2015 Population': {0: 33753499, 1: 2882481, 2: 39543154, 3: 51368, 4: 71746},
'2010 Population': {0: 28189672, 1: 2913399, 2: 35856344, 3: 54849, 4: 71519},
'2000 Population': {0: 19542982, 1: 3182021, 2: 30774621, 3: 58230, 4: 66097},
'1990 Population': {0: 10694796, 1: 3295066, 2: 25518074, 3: 47818, 4: 53569},
'1980 Population': {0: 12486631, 1: 2941651, 2: 18739378, 3: 32886, 4: 35611},
'1970 Population': {0: 10752971, 1: 2324731, 2: 13795915, 3: 27075, 4: 19860},
'Area (km²)': {0: 652230, 1: 28748, 2: 2381741, 3: 199, 4: 468},
'Density (per km²)': {0: 63.0587,
1: 98.8702,
2: 18.8531,
3: 222.4774,
4: 170.5641},
'Growth Rate': {0: 1.0257, 1: 0.9957, 2: 1.0164, 3: 0.9831, 4: 1.01},
'World Population Percentage': {0: 0.52, 1: 0.04, 2: 0.56, 3: 0.0, 4: 0.0}}```
This is trickier than it appears because plotly uses d3-format, but I believe they are using additional metric abbreviations in their formatting to have the default display numbers larger than 1000 in the format 1.425887B.
My original idea was to round to the nearest 2 digits in the hovertemplate with something like:
data2022 = dict(..., hovertemplate = "%{z:.2r}<br>%{text}<extra></extra>")
However, this removes the default metric abbreviation and causes the entire long form decimal to display. The population of China should show up as 1400000000 instead of 1.4B.
So one possible workaround would be to create a new column in your DataFrame called "2022 Population Text" and format the number using a custom function to round and abbreviate your number (credit goes to #rtaft for their function which does exactly that). Then you can pass this column to customdata, and display customdata in your hovertemplate (instead of z).
import pandas as pd
import plotly.graph_objects as go
data = {'CCA3': {0: 'AFG', 1: 'ALB', 2: 'DZA', 3: 'ASM', 4: 'AND'},
'Country/Territory': {0: 'Afghanistan',
1: 'Albania',
2: 'Algeria',
3: 'American Samoa',
4: 'Andorra'},
'Capital': {0: 'Kabul',
1: 'Tirana',
2: 'Algiers',
3: 'Pago Pago',
4: 'Andorra la Vella'},
'Continent': {0: 'Asia', 1: 'Europe', 2: 'Africa', 3: 'Oceania', 4: 'Europe'},
'2022 Population': {0: 1412000000, 1: 2842321, 2: 44903225, 3: 44273, 4: 79824},
'2020 Population': {0: 38972230, 1: 2866849, 2: 43451666, 3: 46189, 4: 77700},
'2015 Population': {0: 33753499, 1: 2882481, 2: 39543154, 3: 51368, 4: 71746},
'2010 Population': {0: 28189672, 1: 2913399, 2: 35856344, 3: 54849, 4: 71519},
'2000 Population': {0: 19542982, 1: 3182021, 2: 30774621, 3: 58230, 4: 66097},
'1990 Population': {0: 10694796, 1: 3295066, 2: 25518074, 3: 47818, 4: 53569},
'1980 Population': {0: 12486631, 1: 2941651, 2: 18739378, 3: 32886, 4: 35611},
'1970 Population': {0: 10752971, 1: 2324731, 2: 13795915, 3: 27075, 4: 19860},
'Area (km²)': {0: 652230, 1: 28748, 2: 2381741, 3: 199, 4: 468},
'Density (per km²)': {0: 63.0587,
1: 98.8702,
2: 18.8531,
3: 222.4774,
4: 170.5641},
'Growth Rate': {0: 1.0257, 1: 0.9957, 2: 1.0164, 3: 0.9831, 4: 1.01},
'World Population Percentage': {0: 0.52, 1: 0.04, 2: 0.56, 3: 0.0, 4: 0.0}
}
## rounds a number to the specified precision, and adds metrics abbreviations
## i.e. 14230000000 --> 14B
## reference: https://stackoverflow.com/a/45846841/5327068
def human_format(num):
num = float('{:.2g}'.format(num))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
df = pd.DataFrame(data=data)
df['2022 Population Text'] = df['2022 Population'].apply(lambda x: human_format(x))
data2022 = dict(type = 'choropleth',
colorscale = 'agsunset',
reversescale = True,
locations = df['Country/Territory'],
locationmode = 'country names',
z = df['2022 Population'],
text = df['CCA3'],
customdata = df['2022 Population Text'],
marker = dict(line = dict(color = 'rgb(12, 12, 12)', width=1)),
colorbar = {'title': 'Population'},
hovertemplate = "%{customdata}<br>%{text}<extra></extra>"
)
layout2022 = dict(title = '<b>World Population 2022<b>',
geo = dict(showframe = True,
showland = True, landcolor = 'rgb(198, 197, 198)',
showlakes = True, lakecolor = 'rgb(85, 173, 240)',
showrivers = True, rivercolor = 'rgb(173, 216, 230)',
showocean = True, oceancolor = 'rgb(173, 216, 230)',
projection = {'type': 'natural earth'}))
choromap2022 = go.Figure(data=[data2022], layout=layout2022)
choromap2022.update_geos(lataxis_showgrid = True, lonaxis_showgrid = True)
choromap2022.update_layout(height = 600,
title_x = 0.5,
title_font_color = 'red',
title_font_family = 'Times New Roman',
title_font_size = 30,
margin=dict(t=80, r=50, l=50),
)
choromap2022.show()
Note: Since China wasn't included in your sample data, I changed the population of AFG to 1412000000 to test that the hovertemplate would display it as '1.4B'.
Related
I currently have a stacked bar chart for brewers. There are 6 brewers. It is good to understand the volume but I want to highlight in my analysis that some of the brewers are being used more than others. To do so I need to turn my bar chart in to 100% stacked bar.
What it currently looks like
I want it so that each of these bars y-axis is always 100.
The code I have at the moment is:
def brewer_number_bar(location):
brewer_df_filtered = brewer_df[(brewer_df['Location Name'].isin(location))]
traces = []
for brewer in brewer_df['Menu Item Name'].unique():
brewer_df_by_brewer = brewer_df_filtered[brewer_df_filtered['Menu Item Name']==brewer]
traces.append(go.Bar(
x = brewer_df_by_brewer['Business Date'],
y = brewer_df_by_brewer['Sales Count'],
name=brewer,
))
return {'data': traces,
'layout': go.Layout(title='Brewer Volume',
xaxis={'title': 'Date', 'categoryorder': 'total descending'},
yaxis={'title': 'Brewer Numbers Used'},
barmode='stack')
}
I have tried to take brewer_df_by_brewer['Sales Count'] / brewer_df_by_brewer['Sales Count'].sum() and created a new trace for each but as I also have location in there it has not worked.
Head of the dataframe brewer_df
{'Business Date': {0: Timestamp('2022-09-05 00:00:00'), 1: Timestamp('2022-09-05 00:00:00'), 2: Timestamp('2022-09-05 00:00:00'), 3: Timestamp('2022-09-05 00:00:00'), 4: Timestamp('2022-09-05 00:00:00')}, 'Major Category': {0: 'Brewer Number', 1: 'Brewer Number', 2: 'Brewer Number', 3: 'Brewer Number', 4: 'Brewer Number'}, 'Location Name': {0: 'France', 1: 'France', 2: 'France', 3: 'Germany', 4: 'Germany'}, 'Menu Item Name': {0: '1', 1: '2', 2: '3', 3: '4', 4: '1'}, 'Sales Count': {0: 176, 1: 163, 2: 22, 3: 7, 4: 89}}
You can simplify your function by using plotly.express. This allows you to stack bars by default, pass the category to the x parameter, and also specify which column you want for the color. I've also used a pandas groupby + transform operation that divides each unique location and date in your filtered dataframe by its sum – this is a little cleaner and also more performant than looping through brewer_df['Menu Item Name'].unique().
In order to make sure we are taking dates into account, I've extended your sample dataframe to include more than one day:
Timestamp = pd.Timestamp
brewer_df = pd.DataFrame({
'Business Date': {
0: Timestamp('2022-09-05 00:00:00'),
1: Timestamp('2022-09-05 00:00:00'),
2: Timestamp('2022-09-05 00:00:00'),
3: Timestamp('2022-09-05 00:00:00'),
4: Timestamp('2022-09-05 00:00:00'),
5: Timestamp('2022-09-06 00:00:00'),
6: Timestamp('2022-09-06 00:00:00'),
7: Timestamp('2022-09-06 00:00:00'),
8: Timestamp('2022-09-06 00:00:00'),
9: Timestamp('2022-09-06 00:00:00')
},
'Major Category': {
0: 'Brewer Number',
1: 'Brewer Number',
2: 'Brewer Number',
3: 'Brewer Number',
4: 'Brewer Number',
5: 'Brewer Number',
6: 'Brewer Number',
7: 'Brewer Number',
8: 'Brewer Number',
9: 'Brewer Number'
},
'Location Name':{
0: 'France',
1: 'France',
2: 'France',
3: 'Germany',
4: 'Germany',
5: 'France',
6: 'France',
7: 'France',
8: 'Germany',
9: 'Germany'
},
'Menu Item Name': {
0: '1',
1: '2',
2: '3',
3: '4',
4: '1',
5: '2',
6: '3',
7: '4',
8: '1',
9: '2'
},
'Sales Count': {
0: 176,
1: 163,
2: 22,
3: 7,
4: 89,
5: 90,
6: 6,
7: 14,
8: 22,
9: 200
}
})
Then the modified brewer_number_bar callback looks like the following:
def brewer_number_bar(location):
brewer_df_filtered = brewer_df[brewer_df['Location Name'] == location].copy()
brewer_df_filtered['Sales Count Percent'] = brewer_df_filtered['Sales Count'] / brewer_df_filtered.groupby(['Location Name','Business Date'])['Sales Count'].transform('sum')
fig = px.bar(brewer_df_filtered, x="Business Date", y="Sales Count Percent", color="Menu Item Name")
fig.update_layout(
title='Brewer Volume',
xaxis={'title': 'Date', 'categoryorder': 'total descending'},
yaxis={'title': 'Brewer Numbers Used', 'tickformat': ',.0%'},
)
return fig
Below are two example figs that this callback would return, one when you pass 'France' as the location with fig = brewer_number_bar('France'), and the other when you pass 'Germany' as the location:
In my table from a dataset I need to highlight rows in bold that contain "All" in columns Building, Floor or Teams:
My code :
headerColor = 'darkgrey'
rowEvenColor = 'lightgrey'
rowOddColor = 'white'
fig_occ_fl_team = go.Figure(data=[go.Table(
header=dict(
values=list(final_table_occ_fl_team.columns),
line_color='black',
fill_color=headerColor,
align=['left','left','left','left','left','left','left','left','left','left'],
font=dict(color='black', size=9)
),
cells=dict(
values=[final_table_occ_fl_team['Building'],
final_table_occ_fl_team['Floor'],
final_table_occ_fl_team['Team'],
final_table_occ_fl_team['Number of Desks'],
final_table_occ_fl_team['Avg Occu (#)'],
final_table_occ_fl_team['Avg Occu (%)'],
final_table_occ_fl_team['Avg Occu 10-4 (#)'],
final_table_occ_fl_team['Avg Occu 10-4 (%)'],
final_table_occ_fl_team['Max Occu (#)'],
final_table_occ_fl_team['Max Occu (%)'],
],
line_color='black',
# 2-D list of colors for alternating rows
fill_color = [[rowOddColor,rowEvenColor]*56],
align = ['left','left','left','left','left','left','left','left','left','left'],
font = dict(color = 'black', size = 7)
))
])
fig_occ_fl_team.show()
Dataset head :
data = {'Building': {0: 'All',
1: '1LWP',
2: '1LWP',
3: '1LWP',
4: '1LWP',
5: '1LWP',
6: '1LWP',
7: '1LWP',
8: '1LWP',
9: '1LWP'},
'Floor': {0: 'All',
1: 'All',
2: '2nd',
3: '2nd',
4: '2nd',
5: '2nd',
6: '2nd',
7: '2nd',
8: '2nd',
9: '2nd'},
'Team': {0: 'All',
1: 'All',
2: 'All',
3: 'Anderson/Money',
4: 'Banking & Treasury',
5: 'Charities',
6: 'Client Management',
7: 'Compliance, Legal & Risk',
8: 'DFM',
9: 'Emmerson'},
'Number of Desks': {0: 2297,
1: 2008,
2: 381,
3: 22,
4: 8,
5: 19,
6: 9,
7: 41,
8: 20,
9: 33},
'Avg Occu (#)': {0: 1261,
1: 1126,
2: 195,
3: 14,
4: 4,
5: 9,
6: 5,
7: 21,
8: 13,
9: 18},
'Avg Occu (%)': {0: '55%',
1: '56%',
2: '51%',
3: '64%',
4: '50%',
5: '48%',
6: '56%',
7: '52%',
8: '65%',
9: '55%'},
'Avg Occu 10-4 (#)': {0: 851,
1: 759,
2: 132,
3: 8,
4: 3,
5: 6,
6: 3,
7: 14,
8: 9,
9: 12},
'Avg Occu 10-4 (%)': {0: '37%',
1: '38%',
2: '35%',
3: '37%',
4: '38%',
5: '32%',
6: '34%',
7: '35%',
8: '45%',
9: '37%'},
'Max Occu (#)': {0: 1901,
1: 1680,
2: 274,
3: 22,
4: 6,
5: 13,
6: 7,
7: 27,
8: 17,
9: 25},
'Max Occu (%)': {0: '83%',
1: '84%',
2: '72%',
3: '100%',
4: '75%',
5: '69%',
6: '78%',
7: '66%',
8: '85%',
9: '76%'}}
You can add the bold style to your dataframe prior to creating the table as follows:
import pandas as pd
df = pd.DataFrame().from_dict(data)
indices = df.index[(df[["Building","Floor","Team"]] == "All").all(1)]
for i in indices:
for j in range(len(df.columns)):
df.iloc[i,j] = "<b>{}</b>".format(df.iloc[i,j])
You can now create the table, I increase the size of font to 12:
import plotly.graph_objects as go
headerColor = 'darkgrey'
rowEvenColor = 'lightgrey'
rowOddColor = 'white'
fig_occ_fl_team = go.Figure(data=[go.Table(
header=dict(
values=list(df.columns),
line_color='black',
fill_color=headerColor,
align=['left','left','left','left','left','left','left','left','left','left'],
font=dict(color='black', size=9)
),
cells=dict(
values=[df['Building'],
df['Floor'],
df['Team'],
df['Number of Desks'],
df['Avg Occu (#)'],
df['Avg Occu (%)'],
df['Avg Occu 10-4 (#)'],
df['Avg Occu 10-4 (%)'],
df['Max Occu (#)'],
df['Max Occu (%)'],
],
line_color='black',
# 2-D list of colors for alternating rows
fill_color = [[rowOddColor,rowEvenColor]*56],
align = ['left','left','left','left','left','left','left','left','left','left'],
font = dict(color = 'black', size = 12)
))
])
fig_occ_fl_team.show()
Output:
You will notice that the first and forth columns are bold. If you want to keep the original dataframe unchanged, you can use such that df2 = df1.copy().
Here is the code I am using:
df['C'] = np.where((df['spread'] > 60) & (df['volume'] > df['Ma_mult_high']),'green','red')
fig = go.Figure()
# add OHLC trace
fig.add_trace(go.Candlestick(x=df.index,
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
showlegend=False))
# add moving average traces
fig.add_trace(go.Scatter(x=df.index,
y=df['ma'],
opacity=0.7,
line=dict(color='blue', width=2),
name='MA 5'))
fig.add_trace(go.Scatter(
x = df.index,
y = df['close'],
mode = 'markers',
marker_color=df.C
))
fig.update_layout(xaxis_rangeslider_visible=False).show()`
the output
in the image, you can see that plot both True and false signal, maybe because the marker_color = "C" but if change that and use only color names it will plot noting even if i change the y = df['close'], i get the same problem
data {'timeStamp': {0: 1657220400000, 1: 1657222200000, 2: 1657224000000, 3: 1657225800000, 4: 1657227600000}, 'open': {0: 21357.7, 1: 21495.84, 2: 21812.46, 3: 21641.56, 4: 21624.03}, 'high': {0: 21499.87, 1: 21837.74, 2: 21838.1, 3: 21659.99, 4: 21727.87}, 'low': {0: 21325.0, 1: 21439.13, 2: 21526.4, 3: 21541.96, 4: 21567.56}, 'close': {0: 21495.83, 1: 21812.47, 2: 21641.56, 3: 21624.03, 4: 21619.57}, 'volume': {0: 3663.2089, 1: 7199.91652, 2: 4367.94336, 3: 1841.10043, 4: 1786.17022}, 'quoteVolume': {0: 78386481.2224664, 1: 155885063.7202956, 2: 94605455.6190078, 3: 39756576.8814698, 4: 38684342.7232105}, 'tradesCount': {0: 59053, 1: 111142, 2: 81136, 3: 56148, 4: 53122}, 'date': {0: Timestamp('2022-07-07 19:00:00'), 1: Timestamp('2022-07-07 19:30:00'), 2: Timestamp('2022-07-07 20:00:00'), 3: Timestamp('2022-07-07 20:30:00'), 4: Timestamp('2022-07-07 21:00:00')}, 'Avg_Volume': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan}, 'Ma_mult_high': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan}, 'Ma_mult_mid': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan}, 'spread': {0: 78.9901069365825, 1: 79.43353152203923, 2: 54.82836060314386, 3: 14.85215623146836, 4: 2.782109662528346}, 'Marker': {0: 21502.87, 1: 21840.74, 2: 21523.4, 3: 21538.96, 4: 21564.56}, 'Symbol': {0: 'triangle-up', 1: 'triangle-up', 2: 'triangle-down', 3: 'triangle-down', 4: 'triangle-down'}, 'ma': {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}, 'C': {0: 'red', 1: 'red', 2: 'red', 3: 'red', 4: 'red'}}
It seems to me that the issue is in your np.where() statement, likely with the nan values in Ma_multi_high producing the false statement in df['volume'] > df['Ma_mult_high'] that result in 'red'.
Try this:
df['C'] = np.where((df['spread'] > 60) & (df['volume'] > df['Ma_mult_high'].fillna(0)),'green','red')
I have a data set that I need to reformat so that I can plot and work with it further. It is sort of an transpose action but I am struggling to not overwrite the data in the new dataframe. I sorted out the headings using dictionaries and it maps the fields from the original df to the new output df correctly. It is just overwriting the first entry and not adding a new POLY/POLY_NAME
Input dataframe:
Output dataframe:
Below is my code so far:
import pandas as pd
fractions = {"A": 1.35, "B": 1.40, "C": 1.45}
quality = {"POLY_NAME":"POLY", "AS":"Ash", "CV":"CV","FC":"FC","MS":"Moist","TS":"Tots","VM":"Vols","YL":"Yield"}
frac = list(fractions.values())
headers = list(quality.values())
df = pd.DataFrame(columns=headers, index=frac)
wash_dic = {'POLY_NAME': {0: 'Asset 1', 1: 'Asset 2', 2: 'Asset 3'},
'RD': {0: 1.63, 1: 1.63, 2: 1.57},
'SEAMTH': {0: 3.02, 1: 3.02, 2: 3.37},
'AAS': {0: 7.76, 1: 7.34, 2: 7.24},
'ACV': {0: 28.98, 1: 29.18, 2: 29.27},
'AFC': {0: 54.95, 1: 53.55, 2: 52.38},
'AMS': {0: 4.22, 1: 4.26, 2: 4.63},
'ATS': {0: 0.97, 1: 1.09, 2: 1.23},
'AVM': {0: 33.07, 1: 34.85, 2: 35.75},
'AYL': {0: 0.4, 1: 0.95, 2: 0.75},
'BAS': {0: 9.28, 1: 9.27, 2: 9.58},
'BCV': {0: 28.17, 1: 28.33, 2: 28.09},
'BFC': {0: 56.21, 1: 54.39, 2: 52.11},
'BMS': {0: 4.25, 1: 4.25, 2: 4.61},
'BTS': {0: 0.84, 1: 1.01, 2: 1.22},
'BVM': {0: 30.25, 1: 32.08, 2: 33.7},
'BYL': {0: 3.11, 1: 5.44, 2: 4.36},
'CAS': {0: 11.01, 1: 10.96, 2: 11.25},
'CCV': {0: 27.31, 1: 27.53, 2: 27.39},
'CFC': {0: 58.09, 1: 56.0, 2: 53.43},
'CMS': {0: 4.41, 1: 4.38, 2: 4.62},
'CTS': {0: 0.63, 1: 0.83, 2: 0.98},
'CVM': {0: 26.5, 1: 28.66, 2: 30.71},
'CYL': {0: 13.45, 1: 16.11, 2: 12.94}}
wash = pd.DataFrame(wash_dic)
wash
for label, content in wash.items():
print('fraction:', fractions.get(label[0]), ' quality:', quality.get(label[-2:]))
for c in content:
try:
df.loc[fractions.get(label[0]), quality.get(label[-2:])] = c
except:
pass
I have tried to add another for loop but the logic is escaping me currently.
Required outcome as dictionary
outcome = {'Unnamed: 0': {0: 1.35,
1: 1.4,
2: 1.45,
3: 1.35,
4: 1.4,
5: 1.45,
6: 1.35,
7: 1.4,
8: 1.45},
'POLY': {0: 'Asset 1',
1: 'Asset 1',
2: 'Asset 1',
3: 'Asset 2',
4: 'Asset 2',
5: 'Asset 2',
6: 'Asset 3',
7: 'Asset 3',
8: 'Asset 3'},
'Ash': {0: 7.76,
1: 9.28,
2: 11.01,
3: 7.34,
4: 9.27,
5: 10.96,
6: 7.24,
7: 9.58,
8: 11.25},
'CV': {0: 28.98,
1: 28.17,
2: 27.31,
3: 29.18,
4: 28.33,
5: 27.53,
6: 29.27,
7: 28.09,
8: 27.39},
'FC': {0: 54.95,
1: 56.21,
2: 58.09,
3: 53.55,
4: 54.39,
5: 56.0,
6: 52.38,
7: 52.11,
8: 53.43},
'Moist': {0: 4.22,
1: 4.25,
2: 4.41,
3: 4.26,
4: 4.25,
5: 4.38,
6: 4.63,
7: 4.61,
8: 4.62},
'Tots': {0: 0.97,
1: 0.84,
2: 0.63,
3: 1.09,
4: 1.01,
5: 0.83,
6: 1.23,
7: 1.22,
8: 0.98},
'Vols': {0: 33.07,
1: 30.25,
2: 26.5,
3: 34.85,
4: 32.08,
5: 28.66,
6: 35.75,
7: 33.7,
8: 30.71},
'Yiels': {0: 0.4,
1: 3.11,
2: 13.45,
3: 0.95,
4: 5.44,
5: 16.11,
6: 0.75,
7: 4.36,
8: 12.94}}
Regards
I resolved to duplicate/overwriting of the values by first grouping the original wash DF and then in the for loop and the data of each loop into a blank DF and at the end of the loop append it to the Final DF. Just for neatness I made the index column a normal column and reordered the columns.
groups = wash.groupby("POLY_NAME")
df_final = pd.DataFrame(columns=headers)
for name, group in groups:
df = pd.DataFrame(columns=headers)
for label, content in group.items():
if quality.get(label[-2:]) in headers:
#print(label)
#print(name)
#print(label, content)
for c in content:
try:
df.loc[fractions.get(label[0]), "POLY"] = name
df.loc[fractions.get(label[0]), quality.get(label[-2:])] = c
#print('Poly:', name, ' fraction:', fractions.get(label[0]), ' quality:', quality.get(label[-2:]))
except:
pass
df_final = df_final.append(df)
df_final = df_final.reset_index().rename({'index':'FLOAT'}, axis = 'columns')
df_final = df_final.reindex(columns=["POLY","FLOAT","Ash","CV","FC","Moist","Tots","Vols","Yield"])
Might not be the neatest or fastest method but it gives the required results.
I have a csv file with data that I have imported into a dataframe.
'RI_df = pd.read_csv("../Week15/police.csv")'
Using .head() my data looks like this:
state stop_date stop_time county_name driver_gender driver_race violation_raw violation search_conducted search_type stop_outcome is_arrested stop_duration drugs_related_stop district
0 RI 2005-01-04 12:55 NaN M White Equipment/Inspection Violation Equipment False NaN Citation False 0-15 Min False Zone X4
1 RI 2005-01-23 23:15 NaN M White Speeding Speeding False NaN Citation False 0-15 Min False Zone K3
2 RI 2005-02-17 04:15 NaN M White Speeding Speeding False NaN Citation False 0-15 Min False Zone X4
3 RI 2005-02-20 17:15 NaN M White Call for Service Other False NaN Arrest Driver
RI_df.head().to_dict()
Out[55]:
{'state': {0: 'RI', 1: 'RI', 2: 'RI', 3: 'RI', 4: 'RI'},
'stop_date': {0: '2005-01-04',
1: '2005-01-23',
2: '2005-02-17',
3: '2005-02-20',
4: '2005-02-24'},
'stop_time': {0: '12:55', 1: '23:15', 2: '04:15', 3: '17:15', 4: '01:20'},
'county_name': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan},
'driver_gender': {0: 'M', 1: 'M', 2: 'M', 3: 'M', 4: 'F'},
'driver_race': {0: 'White', 1: 'White', 2: 'White', 3: 'White', 4: 'White'},
'violation_raw': {0: 'Equipment/Inspection Violation',
1: 'Speeding',
2: 'Speeding',
3: 'Call for Service',
4: 'Speeding'},
'violation': {0: 'Equipment',
1: 'Speeding',
2: 'Speeding',
3: 'Other',
4: 'Speeding'},
'search_conducted': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
'search_type': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan},
'stop_outcome': {0: 'Citation',
1: 'Citation',
2: 'Citation',
3: 'Arrest Driver',
4: 'Citation'},
'is_arrested': {0: False, 1: False, 2: False, 3: True, 4: False},
'stop_duration': {0: '0-15 Min',
1: '0-15 Min',
2: '0-15 Min',
3: '16-30 Min',
4: '0-15 Min'},
'drugs_related_stop': {0: False, 1: False, 2: False, 3: False, 4: False},
'district': {0: 'Zone X4',
1: 'Zone K3',
2: 'Zone X4',
3: 'Zone X1',
4: 'Zone X3'}}
RI_df['drugs_related_stop'].value_counts()
Out[27]:
False 90879
True 862
Name: drugs_related_stop, dtype: int64
I am trying to take the true value counts of "drug related stops" and put them on a line graph, in order to see if "drug related stops" have been increasing over time.
ax = RI_df['drugs_related_stop'].value_counts().plot(kind='line',
figsize=(10,8),
title="Drug stops")
ax.set_xlabel("drug stops")
ax.set_ylabel("number of stops")
You should just use groupby().count()
ax = df.groupby('stop_date', as_index=False).count().plot(kind='line',
figsize=(10,8), title="Drug stops", x='stop_date',
y='district')
Here is the complete code so you can double-check:
import pandas as pd
import numpy as np
df = pd.DataFrame({'state': {0: 'RI', 1: 'RI', 2: 'RI', 3: 'RI', 4: 'RI'},
'stop_date': {0: '2005-01-23',
1: '2005-01-23',
2: '2005-02-17',
3: '2005-02-17',
4: '2005-02-24'},
'stop_time': {0: '12:55', 1: '23:15', 2: '04:15', 3: '17:15', 4: '01:20'},
'county_name': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan},
'driver_gender': {0: 'M', 1: 'M', 2: 'M', 3: 'M', 4: 'F'},
'driver_race': {0: 'White', 1: 'White', 2: 'White', 3: 'White', 4: 'White'},
'violation_raw': {0: 'Equipment/Inspection Violation',
1: 'Speeding',
2: 'Speeding',
3: 'Call for Service',
4: 'Speeding'},
'violation': {0: 'Equipment',
1: 'Speeding',
2: 'Speeding',
3: 'Other',
4: 'Speeding'},
'search_conducted': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
'search_type': {0: np.nan, 1: np.nan, 2: np.nan, 3: np.nan, 4: np.nan},
'stop_outcome': {0: 'Citation',
1: 'Citation',
2: 'Citation',
3: 'Arrest Driver',
4: 'Citation'},
'is_arrested': {0: False, 1: False, 2: False, 3: True, 4: False},
'stop_duration': {0: '0-15 Min',
1: '0-15 Min',
2: '0-15 Min',
3: '16-30 Min',
4: '0-15 Min'},
'drugs_related_stop': {0: False, 1: False, 2: False, 3: False, 4: False},
'district': {0: 'Zone X4',
1: 'Zone K3',
2: 'Zone X4',
3: 'Zone X1',
4: 'Zone X3'}})
ax = df.groupby('stop_date', as_index=False).count().plot(kind='line',
figsize=(10,8), title="Drug stops", x='stop_date',
y='district')
This is what I'm getting with the code below...
ax = df.groupby('stop_date', as_index=False).count().plot(kind='line',
figsize=(10,8), title="Drug stops", x='stop_date',
y='district')