Bar plot for multidimensional columns using pandas - python

I want to plot my dataframe (df) as a bar plot based on the time columns, where each bar represents the value counts() for each letter that appears in the column.
Expected output
.
date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N
When I select individual time columns, I can do as below
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
df = df['04:00:00'].value_counts()
df.plot(kind='bar')
plt.show()
How can I plot all the columns on the same bar plot as shown on the expected output.

One possible solution is:
pd.DataFrame({t: df[t].value_counts() for t in df.columns if t != "date"}).T.plot.bar()

Here is an approach via seaborn's catplot:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from io import StringIO
df_str = '''date,00:00:00,01:00:00,02:00:00,03:00:00,04:00:00
2002-02-01,Y,Y,U,N,N
2002-02-02,U,N,N,N,N
2002-02-03,N,N,N,N,N
2002-02-04,N,N,N,N,N
2002-02-05,N,N,N,N,N'''
df = pd.read_csv(StringIO(df_str))
df_long = df.set_index('date').melt(var_name='hour', value_name='kind')
g = sns.catplot(kind='count', data=df_long, x='kind', palette='mako',
col='hour', col_wrap=5, height=3, aspect=0.5)
for ax in g.axes.flat:
ax.set_xlabel(ax.get_title()) # use the title as xlabel
ax.grid(True, axis='y')
ax.set_title('')
if len(ax.get_ylabel()) == 0:
sns.despine(ax=ax, left=True) # remove left axis for interior subplots
ax.tick_params(axis='y', size=0)
plt.tight_layout()
plt.show()

Related

Seaborn xaxis with large timeline

I have around 4475 rows of csv data like below:
,Time,Values,Size
0,1900-01-01 23:11:30.368,2,
1,1900-01-01 23:11:30.372,2,
2,1900-01-01 23:11:30.372,2,
3,1900-01-01 23:11:30.372,2,
4,1900-01-01 23:11:30.376,2,
5,1900-01-01 23:11:30.380,,
6,1900-01-01 23:11:30.380,,
7,1900-01-01 23:11:30.380,,
8,1900-01-01 23:11:30.380,,321
9,1900-01-01 23:11:30.380,,111
.
.
4474,1900-01-01 23:11:32.588,,
When I try to create simple seaborn lineplot with below code. It creates line chart but its continuous chart while my data i.e. 'Values' has many empty/nan values which should show as gap on chart. How can I do that?
[from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("Data.csv")
sns.set(rc={'figure.figsize':(13,4)})
ax =sns.lineplot(x="Time", y="Values", data=df)
ax.set(xlabel='Time', ylabel='Values')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()]
As reported in this answer:
I've looked at the source code and it looks like lineplot drops nans from the DataFrame before plotting. So unfortunately it's not possible to do it properly.
So, the easiest way to do it is to use matplotlib in place of seaborn.
In the code below I generate a dataframe like your with 20% of missing values in 'Values' column and I use matplotlib to draw a plot:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Time': pd.date_range(start = '1900-01-01 23:11:30', end = '1900-01-01 23:11:30.1', freq = 'L')})
df['Values'] = np.random.randint(low = 2, high = 10, size = len(df))
df['Values'] = df['Values'].mask(np.random.random(df['Values'].shape) < 0.2)
fig, ax = plt.subplots(figsize = (13, 4))
ax.plot(df['Time'], df['Values'])
ax.set(xlabel = 'Time', ylabel = 'Values')
plt.xticks(rotation = 90)
plt.tight_layout()
plt.show()

Dataframe Bar plot with Seaborn

I'm trying to create a bar plot from a DataFrame with Datetime Index.
This is an example working code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
fig, ax = plt.subplots()
ax.bar(df.index, df['numbers'])
The result is:
As you can see the white bars cannot be distinguished well with respect of the background (why?).
I tried using instead:
df['numbers'].plot(kind='bar')
import matplotlib.ticker as ticker
ticklabels = df.index.strftime('%Y-%m')
ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
with this result:
But in this way I lose the automatic xticks labels (and grid) 6-months spacing.
Any idea?
You can just change the style:
import matplotlib.pyplot as plt
index = pd.date_range('2012-01-01', periods=48, freq='M')
data = np.random.randint(100, size = (len(index),1))
df = pd.DataFrame(index=index, data=data, columns=['numbers'])
plt.figure(figsize=(12, 5))
plt.style.use('default')
plt.bar(df.index,df['numbers'],color="red")
You do not actually use seaborn. Replace ax.bar(df.index, df['numbers'])
with
sns.barplot(df.index, df['numbers'], ax=ax)

Seaborn - Display Last Value / Label

I would like create an plot with to display the last value on line. But i can not create the plot with the last value on chart. Do you have an idea for to resolve my problem, thanks you !
Input :
DataFrame
Plot
Output :
Cross = Last Value In columns
Output Final
# import eikon as ek
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import seaborn as sns; sns.set()
import pylab
from scipy import *
from pylab import *
fichier = "P:/GESTION_RPSE/GES - Gestion Epargne Salariale/Dvp Python/Florian/Absolute
Performance/PLOT.csv"
df = pd.read_csv(fichier)
df = df.drop(columns=['Unnamed: 0'])
# sns.set()
plt.figure(figsize=(16, 10))
df = df.melt('Date', var_name='Company', value_name='Value')
#palette = sns.color_palette("husl",12)
ax = sns.lineplot(x="Date", y="Value", hue='Company', data=df).set_title("LaLaLa")
plt.show()
Do you just want to put an 'X' at the end of your lines?
If so, you could pass markerevery=[-1] to the call to lineplot(). However there are a few caveats:
You have to use style= instead of hue= otherwise, there are no markers drawn
Filled markers work better than unfilled markers (like "x"). You can just use markers=True to use the default markers, or pass a list markers=['s','d','o',etc...]
code:
fmri = sns.load_dataset("fmri")
fig, ax = plt.subplots()
ax = sns.lineplot(x="timepoint", y="signal",
style="event", data=fmri, ci=None, markers=True, markevery=[-1], markersize=10)

How to change xticks to yearly interval in pandas time series plot

I am very new to pandas, and I have searched many StackOverflow questions similar to this for changing xtick labels yearly, but they all are different did not solve my problem, so I decided to ask my own question.
Here is my question. I have a mock data frame which I want to plot yearly xticks in the x-axis.
import numpy as np
import pandas as pd
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
df.plot()
This gives:
Xticks ==> 1995 2000 2005 etc
But I want ==> 1991 1992 ... 2019
How to do that?
So far I have tried this:
import matplotlib
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
fig,ax = plt.subplots()
df.plot(ax=ax)
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator(base=1))
# ax.xaxis.set_minor_locator(matplotlib.dates.YearLocator(base=1))
# ax.set_xticklabels(list(df.index.time))
This gives just 2005 as xtick and nothing has worked till now.
Links I looked:
- Changing xticks in a pandas plot
- Python: Change the time on xticks for Pandas Plot
- https://matplotlib.org/3.1.1/api/dates_api.html
You need to use the x_compat=True argument to have pandas choose the units in a way that they are compatible with matplotlib.dates locators and formatters.
df.plot(ax=ax, x_compat=True)
Complete code:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
fig,ax = plt.subplots()
df.plot(ax=ax, x_compat=True)
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator(base=1))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
plt.show()
You can try this:
import datetime
# create xticks
xticks = pd.date_range(datetime.datetime(1990,1,1), datetime.datetime(2020,1,1), freq='YS')
# plot
fig, ax = plt.subplots(figsize=(12,8))
df['value'].plot(ax=ax,xticks=xticks.to_pydatetime())
ax.set_xticklabels([x.strftime('%Y') for x in xticks]);
plt.xticks(rotation=90);
Complete Example
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import datetime
# data
df = pd.DataFrame({'date': pd.date_range('1991-01-01','2019-01-01')}).set_index('date')
df['value'] = np.random.randn(len(df))
# create xticks
xticks = pd.date_range(datetime.datetime(1990,1,1), datetime.datetime(2020,1,1), freq='YS')
# plot
fig, ax = plt.subplots(figsize=(12,8))
df['value'].plot(ax=ax,xticks=xticks.to_pydatetime())
ax.set_xticklabels([x.strftime('%Y') for x in xticks]);
plt.xticks(rotation=90);
plt.show()
This gives:

Arranging multiple for loop categorical plots with Seaborn

I am creating multiple categorical plots for data frame df with a for loop:
object_bol = df.dtypes == 'object'
for catplot in df.dtypes[object_bol].index:
sns.countplot(y=catplot,data=df)
plt.show()
Output is all the plots sequenced one after the other, how do i assign this to a grid with n columns and m rows (n & m vary depending on number of objects in data frame)?
You would want to extend the example from How do I plot two countplot graphs side by side in seaborn? to more subplots.
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
df=pd.DataFrame(np.random.choice(list("abcd"), size=(100,20), p=[.4,.3,.2,.1]))
fig, axes =plt.subplots(5,4, figsize=(10,10), sharex=True)
axes = axes.flatten()
object_bol = df.dtypes == 'object'
for ax, catplot in zip(axes, df.dtypes[object_bol].index):
sns.countplot(y=catplot, data=df, ax=ax, order=np.unique(df.values))
plt.tight_layout()
plt.show()
You would get something similar without seaborn directly from pandas:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df=pd.DataFrame(np.random.choice(list("abcd"), size=(100,20), p=[.4,.3,.2,.1]))
df.apply(pd.value_counts).plot(kind="barh", subplots=True, layout=(4,5), legend=False)
plt.tight_layout()
plt.show()

Categories

Resources