I would like to display the following dataframe in barchart but with double y axis, I want to show areas columns on left side and prices columns on right side:
area1 area2 price1 price2
level
first 263.16 906.58 10443.32 35101.88
second 6879.83 14343.03 2077.79 4415.53
third 31942.75 60864.24 922.87 1774.47
I tried with code below, it works but only display left side.
import matplotlib.pyplot as plt
df.plot(kind='bar')
plt.xticks(rotation=45, fontproperties="SimHei")
plt.xlabel("")
plt.legend()
Thank you.
If I understood you correctly, one way could be this, but you have to "play" a bit with the values of width and position of the ticks:
import pandas as pd
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
width = 0.1
df.area1.plot(kind='bar', color='red', ax=ax, width=width, position=0 )
df.area2.plot(kind='bar', color='orange', ax=ax, width=width, position=1)
df.price1.plot(kind='bar', color='blue', ax=ax2, width=width, position=2)
df.price2.plot(kind='bar', color='green', ax=ax2, width=width, position=3)
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.legend(["Area1", "Area2"], bbox_to_anchor=(0.8,1.0))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(0.9,1.0))
plt.show()
Another way is this:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax2 = ax.twinx()
# ax.set_xticklabels(ax.get_xticklabels(),rotation=45) # Rotation 45 degrees
width = 0.1
ind = np.arange(len(df))
ax.set_ylabel('Area')
ax2.set_ylabel('Price')
ax.set_xlabel('Level')
ax.bar(ind, df.area1, width, color='red', label='area1')
ax.bar(ind + width, df.area2, width, color='orange', label='area2')
ax2.bar(ind + 2*width, df.price1, width, color='blue', label='price1')
ax2.bar(ind + 3*width, df.price2, width, color='green', label='price2')
ax.set(xticks=(ind + 1.5*width), xticklabels=df.index, xlim=[2*width - 1, len(df)])
ax.legend(["Area1", "Area2"], bbox_to_anchor=(1,1))
ax2.legend(["Price1", "Price2"], bbox_to_anchor=(1,0.87))
plt.show()
Related
I have 5 datasets that have thousands of x and y coordinates grouped by 'frame' that create 5 trajectory plots. I'd like to mark the first and last coordinates for each plot but having difficulty figuring it out. I am using Jupiter Notebook.
mean_pos1 = gr1.mean()
mean_pos2 = gr2.mean()
mean_pos3 = gr3.mean()
mean_pos4 = gr4.mean()
mean_pos5 = gr5.mean()
plt.figure()
xlim=(200, 1500)
ylim=(0, 1200)
ax1 = mean_pos1.plot(x='x', y='y',color='blue',label='Dolphin A'); ax1.set_title('mean trajectory');
ax2 = mean_pos2.plot(x='x', y='y',color='red',label='Dolphin B'); ax2.set_title('mean trajectory');
ax3 = mean_pos3.plot(x='x', y='y',color='green',label='Dolphin C'); ax3.set_title('mean trajectory');
ax4 = mean_pos4.plot(x='x', y='y',color='magenta',label='Dolphin D'); ax4.set_title('mean trajectory');
ax5 = mean_pos5.plot(x='x', y='y',color='cyan',label='Dolphin E'); ax5.set_title('mean trajectory');
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_ylim(ylim)
ax5.set_xlim(xlim)
ax5.set_ylim(ylim)
plt.show()
the output of them looks like this:
Use the scatter method to plot the markers separately on the same axis by grabbing the first and last elements from your x and y series:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'x': np.random.normal(3,0.2,10), 'y': np.random.normal(5,0.3,10)})
fig, ax = plt.subplots()
df.plot(x='x', y='y', ax=ax)
ax.scatter(df['x'].iloc[0], df['y'].iloc[0], marker='o', color='red')
ax.scatter(df['x'].iloc[-1], df['y'].iloc[-1], marker='o', color='red')
plt.show()
I have a problem about putting a radar chart and bar graph in the subplot in Python.
I defined 1 row and 2 columns to put each one into each slot.
I tried to handle with this process but I couldn't.
How can I do that?
Here is my radar function shown below.
def radar_chart(values=[]):
labels=np.array(['Crew',
'Length',
'Wingspan',
'Height',
'WingArea'
]
)
angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False)
#print(angles)
fig=plt.figure(figsize=(6,6))
#plt.suptitle(title, y=1.04)
for v in values:
stats=np.array(ww2aircraft_df[ww2aircraft_df["Name"]==v][labels])[0]
#print(stats)
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, stats, 'o-', linewidth=2, label = v)
ax.fill(angles, stats, alpha=0.25)
ax.set_thetagrids(angles * 180/np.pi, labels)
ax.grid(True)
#plt.legend(loc="upper right",bbox_to_anchor=(1.2,1.0))
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10),
fancybox=True, shadow=True, ncol=1, fontsize=13)
Here is my code snippets shown below.
f,a = plt.subplots(1,2,figsize=(24,10))
radar_chart(values=ww2aircraft_df_top_5["Name"])
graph_1 = sns.barplot(data = ww2aircraft_df_top_5,
x = "MaxSpeed",
y = "Name" , ax = a[1])
show_values_on_bars(graph_1, "h", 0.3)
plt.suptitle('Top 5 fastest of WW2 warplane by their features',
fontsize=20,
fontweight="semibold",
)
plt.tight_layout()
plt.savefig('images/image10.png', bbox_inches = "tight")
plt.show()
Possible solution is the following:
The dataset can be found HERE
# pip install matplotlib
# pip install pandas
# pip install seaborn
import csv
import pandas as pd
import numpy as np
from math import pi
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
# read csv to dataframe
df = pd.read_csv('ww2aircraft.csv', sep=';')
# select top-5 rows by 'MaxSpeed' column
df_top5_maxspeed = df.nlargest(5, 'MaxSpeed').reset_index(drop=True)
# convert column values to float type
df_top5_maxspeed['Length'] = df_top5_maxspeed['Length'].astype('float64')
df_top5_maxspeed['Wingspan'] = df_top5_maxspeed['Wingspan'].astype('float64')
# limit dataframe to required columns
df_top5_maxspeed_data = df_top5_maxspeed[["Name","Crew","Length","Wingspan","Height","WingArea","MaxSpeed"]]
df_top5_maxspeed_data
def create_radar_chart(df):
# limit data drame
df = df.iloc[:, :-1]
categories=list(df_top5_maxspeed_data)[1:-1]
N = len(categories)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
ax = fig.add_subplot(gs[0, 0], polar=True)
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
plt.xticks(angles[:-1], categories, size=10)
ax.set_rlabel_position(0)
plt.yticks([10,20,30,40], ["10","20","30","40"], color="grey", size=10)
plt.ylim(0,40)
for row in range(0, len(df.index)):
values=df.loc[row].drop(['Name']).values.flatten().tolist()
values+= values[:1]
ax.plot(angles, values, 'o-', linewidth=2, label = df.loc[row]["Name"])
ax.fill(angles, values, alpha=0.2)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10),
fancybox=False, shadow=False, ncol=1, fontsize=10, frameon=False)
def create_bar_chart(df):
ax = fig.add_subplot(gs[0, 1])
df = df[['Name','MaxSpeed']]
df.plot.bar(x='Name', y='MaxSpeed', ax = ax, legend=False)
plt.xlabel("")
# create plots area
fig = plt.figure(figsize=(15, 5))
gs = GridSpec(nrows=1, ncols=2, width_ratios=[1, 1], wspace=0.1)
fig.suptitle('Top 5 fastest of WW2 warplane by their features', fontsize=16)
# add charts
create_radar_chart(df_top5_maxspeed_data)
create_bar_chart(df_top5_maxspeed_data)
# adjust space between title and charts
plt.subplots_adjust(top=0.85)
When I run the following lines, I get a plot with a large space at the top and the bottom with no bars.
How can I remove this extra space?
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
random.seed(1)
df = pd.DataFrame(np.random.randn(50, 1), columns=["parameter"])
df["standard_error"]= ((df.parameter**2)**0.5)/2
name = "plot"
x = ["A"+str(x) for x in df.index.tolist()]
y1 = df.parameter
yerr1 = df.standard_error
fig, ax = plt.subplots()
fig.set_figheight(len(x))
plt.rc('axes', labelsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-', alpha=0.2)
trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData
er1 = ax.errorbar(y1, x, xerr=yerr1, marker="o", linestyle="none", transform=trans1)
ax.axvline(x=0, color="black")
plt.savefig(name + '.png', bbox_inches='tight')
If you mean the extra space below and above your smallest and largest data points along the y-axis then you can simply use plt.ylim, e.g:
plt.ylim(0, 50)
Which will change the extent of the y-axis to the range 0 - 50. Similarly for the x-axis there's plt.xlim
I have a seaborn.heatmap plotted from a DataFrame:
import seaborn as sns
import matplotlib.pyplot as plt
fig = plt.figure(facecolor='w', edgecolor='k')
sns.heatmap(collected_data_frame, annot=True, vmax=1.0, cmap='Blues', cbar=False, fmt='.4g')
I would like to create some sort of highlight for a maximum value in each column - it could be a red box around that value, or a red dot plotted next to that value, or the cell could be colored red instead of using Blues. Ideally I'm expecting something like this:
I got the highlight working for DataFrame printing in Jupyter Notebook using tips from this answer:
How can I achieve a similar thing but on a heatmap?
We've customized the heatmap examples in the official reference. The customization examples were created from the responses from this site. It's a form of adding parts to an existing graph. I added a frame around the maximum value, but this is manual.
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
sns.set()
# Load the example flights dataset and convert to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(9, 6))
ax = sns.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax)
ax.add_patch(Rectangle((10,6),2,2, fill=False, edgecolor='blue', lw=3))
max value:
ymax = max(flights)
ymax
1960
flights.columns.get_loc(ymax)
11
xmax = flights[ymax].idxmax()
xmax
'July'
xpos = flights.index.get_loc(xmax)
xpos
6
ax.add_patch(Rectangle((ymax,xpos),1,1, fill=False, edgecolor='blue', lw=3))
Complete solution based on the answer of #r-beginners:
Generate DataFrame:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn
arr = np.array([[0.9336719 , 0.90119269, 0.90791181, 0.3112451 , 0.56715989,
0.83339874, 0.14571595, 0.36505745, 0.89847367, 0.95317909,
0.16396293, 0.63463356],
[0.93282304, 0.90605976, 0.91276066, 0.30288519, 0.56366228,
0.83032344, 0.14633036, 0.36081791, 0.9041638 , 0.95268572,
0.16803188, 0.63459491],
[0.15215358, 0.4311569 , 0.32324376, 0.51620611, 0.69872915,
0.08811177, 0.80087247, 0.234593 , 0.47973905, 0.21688613,
0.2738223 , 0.38322856],
[0.90406056, 0.89632902, 0.92220635, 0.3022458 , 0.58843012,
0.78159595, 0.17089609, 0.33443782, 0.89997103, 0.93128579,
0.15942313, 0.62644379],
[0.93868063, 0.45617598, 0.17708323, 0.81828266, 0.72986428,
0.82543775, 0.41530088, 0.2604382 , 0.33132295, 0.94686745,
0.05607774, 0.54141198]])
columns_text = [str(num) for num in range(0,12)]
index_text = ['C1', 'C2', 'C3', 'C4', 'C5']
arr_data_frame = pd.DataFrame(arr, columns=columns_text, index=index_text)
Highlighting maximum in a column:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
column_max = arr_data_frame.idxmax(axis=0)
for col, variable in enumerate(columns_text):
position = arr_data_frame.index.get_loc(column_max[variable])
ax.add_patch(Rectangle((col, position),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_column_heatmap.png', dpi = 500, bbox_inches='tight')
Highlighting maximum in a row:
fig,ax = plt.subplots(figsize=(15, 3), facecolor='w', edgecolor='k')
ax = seaborn.heatmap(arr_data_frame, annot=True, vmax=1.0, vmin=0, cmap='Blues', cbar=False, fmt='.4g', ax=ax)
row_max = arr_data_frame.idxmax(axis=1)
for row, index in enumerate(index_text):
position = arr_data_frame.columns.get_loc(row_max[index])
ax.add_patch(Rectangle((position, row),1,1, fill=False, edgecolor='red', lw=3))
plt.savefig('max_row_heatmap.png', dpi = 500, bbox_inches='tight')
import matplotlib.pyplot as plt
import seaborn as sns
rankings_by_age = star_wars.groupby("Age").agg(np.mean).iloc[:,8:]
age_first = rankings_by_age.iloc[0, :].values
age_second = rankings_by_age.iloc[1, :].values
age_third = rankings_by_age.iloc[2, :].values
age_fourth = rankings_by_age.iloc[3, :].values
fig, ax = plt.subplots(figsize=(12, 9))
ind = np.arange(6)
width = 0.2
rects_1 = ax.bar(ind, age_first, width, color=(114/255,158/255,206/255),
alpha=.8)
rects_2 = ax.bar(ind+width, age_second, width, color=
(255/255,158/255,74/255), alpha=.8)
rects_3 = ax.bar(ind+2*width, age_third, width, color=
(103/255,191/255,92/255), alpha=.8)
rects_4 = ax.bar(ind+3*width, age_fourth, width, color=
(237/255,102/255,93/255), alpha=.8)
ax.set_title("Star Wars Film Rankings by Age")
ax.set_ylabel("Ranking")
ax.set_xticks(ind)
ax.set_xticklabels(titles, rotation=45)
ax.tick_params(top='off', right='off', left='off', bottom='off')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend((rects_1[0], rects_2[0], rects_3[0], rects_4[0]), ('18-29', '30-
44', '45-60', '> 60'), title="Age")
plt.show()
I want to replicate this plot using seaborn, but I am not sure how to go about plotting multiple bars for each category. I understand how to do it using one age group at a time, but getting more than one bar per age group seems tricky. Any help would be appreciated.
Quoting the seaborn bar plot documentation, you can use the hue argument to determine which column of the dataframe the bars should be grouped by.
import seaborn.apionly as sns
import matplotlib.pyplot as plt
df = sns.load_dataset("tips")
ax = sns.barplot(data=df, x="day", y="total_bill", hue="sex")
plt.show()