I have 9 histograms made with matplotlib.pyplot.
Is there an easy way to "stick" them to each other, so that every new histogram would not start with a new row?
Data: data
Providing code:
for column in data:
plt.figure(figsize=(5,5))
a1 = data[(data['Outcome'] == 0)][column]
a2 = data[(data['Outcome'] == 1)][column]
ax = np.linspace(0, data[column].max(), 50)
plt.hist(a1, ax, color='blue', alpha=0.6, label='Have Diabetes = NO')
plt.hist(a2, ax, color='yellow', alpha=0.6, label='Have Diabetes = YES')
plt.title(f'Histogram for {column}')
plt.xlabel(f'{column}')
plt.ylabel('number of people')
plt.grid(True)
leg = plt.legend(loc='upper right', frameon=True)
What I want is something like this:
I actually don't need it to be 3x3, just not go in a column. Is it possible? Thanks for any possible help.
You need to assign the plots to ax , and also it will be set_title etc:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
data = pd.read_csv("datasets_228_482_diabetes.csv")
fig,ax = plt.subplots(3,3,figsize=(9,9))
ax = ax.flatten()
for i,column in enumerate(data.columns):
a1 = data[(data['Outcome'] == 0)][column]
a2 = data[(data['Outcome'] == 1)][column]
ax[i].hist(a1, color='blue', alpha=0.6, label='Have Diabetes = NO')
ax[i].hist(a2, color='yellow', alpha=0.6, label='Have Diabetes = YES')
ax[i].set_title('Histogram for '+column)
ax[i].set_xlabel(f'{column}')
ax[i].set_ylabel('number of people')
ax[i].legend(loc='upper right',frameon=True,markerscale=7,fontsize=7)
fig.tight_layout()
As you can see, the last column outcome is pretty useless, so if you don't plot that, you can also consider using seaborn:
g = sns.FacetGrid(data=data.melt(id_vars="Outcome"),
col="variable",hue="Outcome",sharex=False,sharey=False,
col_wrap=4,palette=['blue','yellow'])
g = g.map(plt.hist,"value",alpha=0.7)
I think you should be plotting using axes rather than pyplot:
from matplotlib import pyplot as plt
fig, axes = plt.subplots(3,3, figsize=(9,9))
for d, ax in zip(data_list, axes.ravel()):
ax.hist(d) # or something similar
Related
I have 5 datasets that have thousands of x and y coordinates grouped by 'frame' that create 5 trajectory plots. I'd like to mark the first and last coordinates for each plot but having difficulty figuring it out. I am using Jupiter Notebook.
mean_pos1 = gr1.mean()
mean_pos2 = gr2.mean()
mean_pos3 = gr3.mean()
mean_pos4 = gr4.mean()
mean_pos5 = gr5.mean()
plt.figure()
xlim=(200, 1500)
ylim=(0, 1200)
ax1 = mean_pos1.plot(x='x', y='y',color='blue',label='Dolphin A'); ax1.set_title('mean trajectory');
ax2 = mean_pos2.plot(x='x', y='y',color='red',label='Dolphin B'); ax2.set_title('mean trajectory');
ax3 = mean_pos3.plot(x='x', y='y',color='green',label='Dolphin C'); ax3.set_title('mean trajectory');
ax4 = mean_pos4.plot(x='x', y='y',color='magenta',label='Dolphin D'); ax4.set_title('mean trajectory');
ax5 = mean_pos5.plot(x='x', y='y',color='cyan',label='Dolphin E'); ax5.set_title('mean trajectory');
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_ylim(ylim)
ax5.set_xlim(xlim)
ax5.set_ylim(ylim)
plt.show()
the output of them looks like this:
Use the scatter method to plot the markers separately on the same axis by grabbing the first and last elements from your x and y series:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'x': np.random.normal(3,0.2,10), 'y': np.random.normal(5,0.3,10)})
fig, ax = plt.subplots()
df.plot(x='x', y='y', ax=ax)
ax.scatter(df['x'].iloc[0], df['y'].iloc[0], marker='o', color='red')
ax.scatter(df['x'].iloc[-1], df['y'].iloc[-1], marker='o', color='red')
plt.show()
I have a dataframe that I'd like to use to build a scatterplot where different points have different colors:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dat=pd.DataFrame(np.random.rand(20, 2), columns=['x','y'])
dat['c']=np.random.randint(0,100,20)
dat['c_norm']=(dat['c']-dat['c'].min())/(dat['c'].max()-dat['c'].min())
dat['group']=np.append(np.repeat('high',10), np.repeat('low',10))
As you can see, the column c_norm shows the c column has been normalized between 0 and 1. I would like to show a continuous legend whose color range reflect the normalized values, but labeled using the original c values as label. Say, the minimum (1), the maximum (86), and the median (49). I also want to have differing markers depending on group.
So far I was able to do this:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
for row in dat.index:
if(dat.loc[row,'group']=='low'):
i_marker='.'
else:
i_marker='x'
ax.scatter(
x=dat.loc[row,'x'],
y=dat.loc[row,'y'],
s=50, alpha=0.5,
marker=i_marker
)
ax.legend(dat['c_norm'], loc='center right', bbox_to_anchor=(1.5, 0.5), ncol=1)
Questions:
- How to generate a continuous legend based on the values?
- How to adapt its ticks to show the original ticks in c, or at least a min, max, and mean or median?
Thanks in advance
Partial answer. Do you actually need to determine your marker colors based on the normed values? See the output of the snippet below.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dat = pd.DataFrame(np.random.rand(20, 2), columns=['x', 'y'])
dat['c'] = np.random.randint(0, 100, 20)
dat['c_norm'] = (dat['c'] - dat['c'].min()) / (dat['c'].max() - dat['c'].min())
dat['group'] = np.append(np.repeat('high', 10), np.repeat('low', 10))
fig, (ax, bx) = plt.subplots(nrows=1, ncols=2, num=0, figsize=(16, 8))
mask = dat['group'] == 'low'
scat = ax.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c'][mask],
marker='s', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
ax.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c'][~mask],
marker='X', vmin=np.amin(dat['c']), vmax=np.amax(dat['c']),
cmap='plasma')
cbar = fig.colorbar(scat, ax=ax)
scat = bx.scatter(dat['x'][mask], dat['y'][mask], s=50, c=dat['c_norm'][mask],
marker='s', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
bx.scatter(dat['x'][~mask], dat['y'][~mask], s=50, c=dat['c_norm'][~mask],
marker='X', vmin=np.amin(dat['c_norm']),
vmax=np.amax(dat['c_norm']), cmap='plasma')
cbar2 = fig.colorbar(scat, ax=bx)
plt.show()
You could definitely modify the second colorbar so that it matches the first one, but is that necessary?
i wanted to know how to make a plot with two y-axis so that my plot that looks like this :
to something more like this by adding another y-axis :
i'm only using this line of code from my plot in order to get the top 10 EngineVersions from my data frame :
sns.countplot(x='EngineVersion', data=train, order=train.EngineVersion.value_counts().iloc[:10].index);
I think you are looking for something like:
import matplotlib.pyplot as plt
x = [1,2,3,4,5]
y = [1000,2000,500,8000,3000]
y1 = [1050,3000,2000,4000,6000]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.bar(x, y)
ax2.plot(x, y1, 'o-', color="red" )
ax1.set_xlabel('X data')
ax1.set_ylabel('Counts', color='g')
ax2.set_ylabel('Detection Rates', color='b')
plt.show()
Output:
#gdubs If you want to do this with Seaborn's library, this code set up worked for me. Instead of setting the ax assignment "outside" of the plot function in matplotlib, you do it "inside" of the plot function in Seaborn, where ax is the variable that stores the plot.
import seaborn as sns # Calls in seaborn
# These lines generate the data to be plotted
x = [1,2,3,4,5]
y = [1000,2000,500,8000,3000]
y1 = [1050,3000,2000,4000,6000]
fig, ax1 = plt.subplots() # initializes figure and plots
ax2 = ax1.twinx() # applies twinx to ax2, which is the second y axis.
sns.barplot(x = x, y = y, ax = ax1, color = 'blue') # plots the first set of data, and sets it to ax1.
sns.lineplot(x = x, y = y1, marker = 'o', color = 'red', ax = ax2) # plots the second set, and sets to ax2.
# these lines add the annotations for the plot.
ax1.set_xlabel('X data')
ax1.set_ylabel('Counts', color='g')
ax2.set_ylabel('Detection Rates', color='b')
plt.show(); # shows the plot.
Output:
Seaborn output example
You could try this code to obtain a very similar image to what you originally wanted.
import seaborn as sb
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle
x = ['1.1','1.2','1.2.1','2.0','2.1(beta)']
y = [1000,2000,500,8000,3000]
y1 = [3,4,1,8,5]
g = sb.barplot(x=x, y=y, color='blue')
g2 = sb.lineplot(x=range(len(x)), y=y1, color='orange', marker='o', ax=g.axes.twinx())
g.set_xticklabels(g.get_xticklabels(), rotation=-30)
g.set_xlabel('EngineVersion')
g.set_ylabel('Counts')
g2.set_ylabel('Detections rate')
g.legend(handles=[Rectangle((0,0), 0, 0, color='blue', label='Nontouch device counts'), Line2D([], [], marker='o', color='orange', label='Detections rate for nontouch devices')], loc=(1.1,0.8))
I like to display a diagram of two data columns. The problem about it is that the legend shows only the last name l/s.
Here is my diagram:
import pandas as pd
import matplotlib.pyplot as plt
Tab = pd.read_csv('Mst01.csv', delimiter=';')
x = Tab['Nr. ']
y1 = Tab['cm']
y2 = Tab['l/s']
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-', label='cm')
ax2.plot(x, y2, 'b-', label='l/s')
ax1.set_xlabel('Nr.')
ax1.set_ylabel('cm', color='g')
ax2.set_ylabel('l/s', color='b')
plt.title('Mst01')
plt.legend()
plt.show()
If I do
ax1.legend()
ax2.legend()
both legends will displayed but one above the other.
By the way, is there a easyier way the get the spaces for every line of code?
Good evening!
so you got two possibilities either you add the plots together or you use fig.legend()
here is some sample code which yields to the solution
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create Example Dataframe
dummy_data = np.random.random_sample((100,2))
df = pd.DataFrame(dummy_data, columns=['Col_1', 'Col_2'])
df.Col_2 = df.Col_2*100
# Create Figure
fig, ax = plt.subplots()
col_1 = ax.plot(df.Col_1, label='Col_1', color='green')
ax_2 = ax.twinx()
col_2 = ax_2.plot(df.Col_2, label='Col_2', color='r')
# first solution
lns = col_1+col_2
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='upper right')
# secound solution
fig.legend()
fig
The solution can be derived from this question.
What do you mean by spaces? you mean the indention of e.g. a for loop?
import matplotlib.pyplot as plt
import seaborn as sns
rankings_by_age = star_wars.groupby("Age").agg(np.mean).iloc[:,8:]
age_first = rankings_by_age.iloc[0, :].values
age_second = rankings_by_age.iloc[1, :].values
age_third = rankings_by_age.iloc[2, :].values
age_fourth = rankings_by_age.iloc[3, :].values
fig, ax = plt.subplots(figsize=(12, 9))
ind = np.arange(6)
width = 0.2
rects_1 = ax.bar(ind, age_first, width, color=(114/255,158/255,206/255),
alpha=.8)
rects_2 = ax.bar(ind+width, age_second, width, color=
(255/255,158/255,74/255), alpha=.8)
rects_3 = ax.bar(ind+2*width, age_third, width, color=
(103/255,191/255,92/255), alpha=.8)
rects_4 = ax.bar(ind+3*width, age_fourth, width, color=
(237/255,102/255,93/255), alpha=.8)
ax.set_title("Star Wars Film Rankings by Age")
ax.set_ylabel("Ranking")
ax.set_xticks(ind)
ax.set_xticklabels(titles, rotation=45)
ax.tick_params(top='off', right='off', left='off', bottom='off')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend((rects_1[0], rects_2[0], rects_3[0], rects_4[0]), ('18-29', '30-
44', '45-60', '> 60'), title="Age")
plt.show()
I want to replicate this plot using seaborn, but I am not sure how to go about plotting multiple bars for each category. I understand how to do it using one age group at a time, but getting more than one bar per age group seems tricky. Any help would be appreciated.
Quoting the seaborn bar plot documentation, you can use the hue argument to determine which column of the dataframe the bars should be grouped by.
import seaborn.apionly as sns
import matplotlib.pyplot as plt
df = sns.load_dataset("tips")
ax = sns.barplot(data=df, x="day", y="total_bill", hue="sex")
plt.show()