I would like create an plot with to display the last value on line. But i can not create the plot with the last value on chart. Do you have an idea for to resolve my problem, thanks you !
Input :
DataFrame
Plot
Output :
Cross = Last Value In columns
Output Final
# import eikon as ek
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import seaborn as sns; sns.set()
import pylab
from scipy import *
from pylab import *
fichier = "P:/GESTION_RPSE/GES - Gestion Epargne Salariale/Dvp Python/Florian/Absolute
Performance/PLOT.csv"
df = pd.read_csv(fichier)
df = df.drop(columns=['Unnamed: 0'])
# sns.set()
plt.figure(figsize=(16, 10))
df = df.melt('Date', var_name='Company', value_name='Value')
#palette = sns.color_palette("husl",12)
ax = sns.lineplot(x="Date", y="Value", hue='Company', data=df).set_title("LaLaLa")
plt.show()
Do you just want to put an 'X' at the end of your lines?
If so, you could pass markerevery=[-1] to the call to lineplot(). However there are a few caveats:
You have to use style= instead of hue= otherwise, there are no markers drawn
Filled markers work better than unfilled markers (like "x"). You can just use markers=True to use the default markers, or pass a list markers=['s','d','o',etc...]
code:
fmri = sns.load_dataset("fmri")
fig, ax = plt.subplots()
ax = sns.lineplot(x="timepoint", y="signal",
style="event", data=fmri, ci=None, markers=True, markevery=[-1], markersize=10)
Related
I want to plot my dataframe (df) as a bar plot based on the time columns, where each bar represents the value counts() for each letter that appears in the column.
Expected output
.
date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N
When I select individual time columns, I can do as below
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
df = df['04:00:00'].value_counts()
df.plot(kind='bar')
plt.show()
How can I plot all the columns on the same bar plot as shown on the expected output.
One possible solution is:
pd.DataFrame({t: df[t].value_counts() for t in df.columns if t != "date"}).T.plot.bar()
Here is an approach via seaborn's catplot:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from io import StringIO
df_str = '''date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N'''
df = pd.read_csv(StringIO(df_str))
df_long = df.set_index('date').melt(var_name='hour', value_name='kind')
g = sns.catplot(kind='count', data=df_long, x='kind', palette='mako',
col='hour', col_wrap=5, height=3, aspect=0.5)
for ax in g.axes.flat:
ax.set_xlabel(ax.get_title()) # use the title as xlabel
ax.grid(True, axis='y')
ax.set_title('')
if len(ax.get_ylabel()) == 0:
sns.despine(ax=ax, left=True) # remove left axis for interior subplots
ax.tick_params(axis='y', size=0)
plt.tight_layout()
plt.show()
I have the following Plot
I need to add percentage inside the bars, it should be like this:
My code is the following:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
pkmn = pd.read_csv('data/Pokemon.csv')
pkmn.head()
order = pkmn['Generation'].value_counts().index
order
pkmngen = pkmn['Generation'].value_counts()
plt.figure(figsize=(6,4))
sb.countplot(data=pkmn, y='Generation', color = sb.color_palette()[4], order=order, )
plt.xticks(rotation=90)
plt.show()
I have lot of feature in data and i want to make box plot for each feature. So for that
import pandas as pd
import seaborn as sns
plt.figure(figsize=(25,20))
for data in train_df.columns:
plt.subplot(7,4,i+1)
plt.subplots_adjust(hspace = 0.5, wspace = 0.5)
ax =sns.boxplot(train_df[data])
I did this
and the output is
All the plot are on one image i want something like
( not with skew graphs but with box plot )
What changes i need to do ?
In your code, I cannot see where the i is coming from and also it's not clear how ax was assigned.
Maybe try something like this, first an example data frame:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
train_df = pd.concat([pd.Series(np.random.normal(i,1,100)) for i in range(12)],axis=1)
Set up fig and a flattened ax for each subplot:
fig,ax = plt.subplots(4,3,figsize=(10,10))
ax = ax.flatten()
The most basic would be to call sns.boxplot assigning ax inside the function:
for i,data in enumerate(train_df.columns):
sns.boxplot(train_df[data],ax=ax[i])
I'm trying to create a bar plot from a DataFrame with Datetime Index.
This is an example working code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
fig, ax = plt.subplots()
ax.bar(df.index, df['numbers'])
The result is:
As you can see the white bars cannot be distinguished well with respect of the background (why?).
I tried using instead:
df['numbers'].plot(kind='bar')
import matplotlib.ticker as ticker
ticklabels = df.index.strftime('%Y-%m')
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
with this result:
But in this way I lose the automatic xticks labels (and grid) 6-months spacing.
Any idea?
You can just change the style:
import matplotlib.pyplot as plt
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
plt.figure(figsize=(12, 5))
plt.style.use('default')
plt.bar(df.index,df['numbers'],color="red")
You do not actually use seaborn. Replace ax.bar(df.index, df['numbers'])
with
sns.barplot(df.index, df['numbers'], ax=ax)
I can plot multiple histograms in a single plot using pandas but there are few things missing:
How to give the label.
I can only plot one figure, how to change it to layout=(3,1) or something else.
Also, in figure 1, all the bins are filled with solid colors, and its kind of difficult to know which is which, how to fill then with different markers (eg. crosses,slashes,etc)?
Here is the MWE:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7,label='species')
plt.legend()
Output:
To change layout I can use by keyword, but can't give them colors
HOW TO GIVE DIFFERENT COLORS?
df.hist('sepal_length',by='species',layout=(3,1))
plt.tight_layout()
Gives:
You can resolve to groupby:
fig,ax = plt.subplots()
hatches = ('\\', '//', '..') # fill pattern
for (i, d),hatch in zip(df.groupby('species'), hatches):
d['sepal_length'].hist(alpha=0.7, ax=ax, label=i, hatch=hatch)
ax.legend()
Output:
In pandas version 1.1.0 you can simply set the legend keyword to true.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')
df.groupby('species')['sepal_length'].hist(alpha=0.7, legend = True)
output image
It's more code, but using pure matplotlib will always give you more control over the plots. For your second case:
import matplotlib.pyplot as plt
import numpy as np
from itertools import zip_longest
# Dictionary of color for each species
color_d = dict(zip_longest(df.species.unique(),
plt.rcParams['axes.prop_cycle'].by_key()['color']))
# Use the same bins for each
xmin = df.sepal_length.min()
xmax = df.sepal_length.max()
bins = np.linspace(xmin, xmax, 20)
# Set up correct number of subplots, space them out.
fig, ax = plt.subplots(nrows=df.species.nunique(), figsize=(4,8))
plt.subplots_adjust(hspace=0.4)
for i, (lab, gp) in enumerate(df.groupby('species')):
ax[i].hist(gp.sepal_length, ec='k', bins=bins, color=color_d[lab])
ax[i].set_title(lab)
# same xlim for each so we can see differences
ax[i].set_xlim(xmin, xmax)