Horizontal lines not appearing on matplotlib plot - python

Here is the sample data:
Datetime Price Data1 Data2 ShiftedPrice
0 2017-11-05 09:20:01.134 2123.0 12.23 34.12 300.0
1 2017-11-05 09:20:01.789 2133.0 32.43 45.62 330.0
2 2017-11-05 09:20:02.238 2423.0 35.43 55.62 NaN
3 2017-11-05 09:20:02.567 3423.0 65.43 56.62 NaN
4 2017-11-05 09:20:02.948 2463.0 45.43 58.62 NaN
I am trying to draw a plot between Datetime and Shiftedprice columns and horizontal lines for mean, confidence intervals of the ShiftedPrice column.
Have a look at the code below:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
df1 = df.dropna(subset=['ShiftedPrice'])
df1
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(121)
ax = df1.plot(x='Datetime',y='ShiftedPrice')
# Plotting the mean
ax.axhline(y=df1['ShiftedPrice'].mean(), color='r', linestyle='--', lw=2)
plt.show()
# Plotting Confidence Intervals
ax.axhline(y=df1['ShiftedPrice'].mean() + 1.96*np.std(df1['ShiftedPrice'],ddof=1), color='g', linestyle=':', lw=2)
ax.axhline(y=df1['ShiftedPrice'].mean() - 1.96*np.std(df1['ShiftedPrice'],ddof=1), color='g', linestyle=':', lw=2)
plt.show()
My problem is that horizontal lines are not appearing. Instead, I get the following message
ax.axhline(y=df1['ShiftedPrice'].mean(), color='r', linestyle='--', lw=2)
Out[22]: <matplotlib.lines.Line2D at 0xccc5c18>

Related

how to put label in dataframe in Density plotting in matplotlib

#dataframe
a=
timestamp count
2021-08-16 20
2021-08-17 60
2021-08-18 35
2021-08-19 1
2021-08-20 0
2021-08-21 1
2021-08-22 50
2021-08-23 36
2021-08-24 68
2021-08-25 125
2021-08-26 54
I applied this code
a.plot(kind="density")
this is not what i want.
I want to put Count on Y axis and timestamp in X axis with Density plotting.
just like i can do it with plt.bar(a['timestamp'],a['count'])
OR this is not possible with Density plotting?
The following code creates a density histogram. The total area sums to 1, supposing each of the timestamps counts as 1 unit. To get the timestamps as x-axis, they are set as the index. To get the total area to sum to 1, all count values are divided by their total sum.
A kde a calculated from the same data.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import gaussian_kde
from io import StringIO
a_str = '''timestamp count
2021-08-16 20
2021-08-17 60
2021-08-18 35
2021-08-19 1
2021-08-20 0
2021-08-21 1
2021-08-22 50
2021-08-23 36
2021-08-24 68
2021-08-25 125
2021-08-26 54'''
a = pd.read_csv(StringIO(a_str), delim_whitespace=True)
ax = (a.set_index('timestamp') / a['count'].sum()).plot.bar(width=0.9, rot=0, figsize=(12, 5))
kde = gaussian_kde(np.arange(len(a)), bw_method=0.2, weights=a['count'])
xs = np.linspace(-1, len(a), 200)
ax.plot(xs, kde(xs), lw=2, color='crimson', label='kde')
ax.set_xlim(xs[0], xs[-1])
ax.legend(labels=['kde', 'density histogram'])
ax.set_xlabel('')
ax.set_ylabel('density')
plt.tight_layout()
plt.show()
If you just want to plot the kde curve, you can leave out the histogram. Optionally you can fill the area under the curve.
fig, ax = plt.subplots(figsize=(12, 5))
kde = gaussian_kde(np.arange(len(a)), bw_method=0.2, weights=a['count'])
xs = np.linspace(-1, len(a), 200)
# plot the kde curve
ax.plot(xs, kde(xs), lw=2, color='crimson', label='kernel density estimation')
# optionally fill the area below the curve
ax.fill_between(xs, kde(xs), color='crimson', alpha=0.2)
ax.set_xticks(np.arange(len(a)))
ax.set_xticklabels(a['timestamp'])
ax.set_xlim(xs[0], xs[-1])
ax.set_ylim(ymin=0)
ax.legend()
ax.set_xlabel('')
ax.set_ylabel('density')
plt.tight_layout()
plt.show()
To plot multiple similar curves, for example using more count columns, you can use a loop. A list of colors that go well together could be obtained from the Set2 colormap:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import gaussian_kde
a = pd.DataFrame({'timestamp': ['2021-08-16', '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-21',
'2021-08-22', '2021-08-23', '2021-08-24', '2021-08-25', '2021-08-26']})
for i in range(1, 5):
a[f'count{i}'] = (np.random.uniform(0, 12, len(a)) ** 2).astype(int)
xs = np.linspace(-1, len(a), 200)
fig, ax = plt.subplots(figsize=(12, 4))
for column, color in zip(a.columns[1:], plt.cm.Set2.colors):
kde = gaussian_kde(np.arange(len(a)), bw_method=0.2, weights=a[column])
ax.plot(xs, kde(xs), lw=2, color=color, label=f"kde of '{column}'")
ax.fill_between(xs, kde(xs), color=color, alpha=0.2)
ax.set_xlim(xs[0], xs[-1])
ax.set_xticks(np.arange(len(a)))
ax.set_xticklabels(a['timestamp'])
ax.set_xlim(xs[0], xs[-1])
ax.set_ylim(ymin=0)
ax.legend()
ax.set_xlabel('Date')
ax.set_ylabel('Density of Counts')
plt.tight_layout()
plt.show()

Matplotlib x-axis limited range

So I've been trying to plot some data. The x-axis is limited to two years. My question is pretty simple can someones explain why X-axis is limited to date range from 2015Q1 - 2017Q1, when the available data is between 2015Q1 - 2020Q1. Is there something missing or incorrect with my code?
dd2
qtr median count
0 2015Q1 1290000.0 27
1 2015Q2 1330000.0 43
2 2015Q3 1570000.0 21
3 2015Q4 1371000.0 20
4 2016Q1 1386500.0 20
5 2016Q2 1767500.0 22
6 2016Q3 1427500.0 32
7 2016Q4 1501000.0 31
8 2017Q1 1700000.0 29
9 2017Q2 1630000.0 15
10 2017Q3 1687500.0 24
11 2017Q4 1450000.0 15
12 2018Q1 1505000.0 13
13 2018Q2 1494000.0 14
14 2018Q3 1415000.0 21
15 2018Q4 1150000.0 15
16 2019Q1 1228000.0 15
17 2019Q2 1352500.0 12
18 2019Q3 1237500.0 12
19 2019Q4 1455000.0 26
20 2020Q1 1468000.0 9
code
x = dd2['qtr']
y1 = dd2['count']
y2 = dd2['median']
fig, ax = plt.subplots(figsize=(40,10))
ax = plt.subplot(111)
ax2 = ax.twinx()
y1_plot = y1.plot(ax=ax2, color='green', legend=True, marker='*', label="median")
y2_plot = y2.plot(ax=ax, color='red', legend=True, linestyle='--', marker='x', label="count")
plt.title('Price trend analysis')
ax.set_xticklabels(x, rotation='vertical',color='k', size=20)
ax.set_xlabel('year')
ax.set_ylabel('sold price')
ax2.set_ylabel('number of sales')
y1_patch = mpatches.Patch(color='red', label='median sold price')
y2_patch = mpatches.Patch(color='green', label='count')
plt.legend(handles=[y2_patch,y1_patch],loc='upper right')
plt.savefig('chart.png', dpi=300,bbox_inches ='tight')
plt.show()
using mtick to plot all x-axis data.
import matplotlib.ticker as mtick
ax.xaxis.set_major_locator(mtick.IndexLocator(base=1, offset=0))
Instead of going through Pandas' Series plotting methods, I'd use pyplot to plot your x and y data together, like this:
# everything is the same up to 'ax2 = ax.twinx()'
# plot on your axes, save a reference to the line
line1 = ax.plot(x, y1, color="green", label="median sold price", marker='*')
line2 = ax2.plot(x, y2, color="red", label="count", marker='x')
# no need for messing with patches
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax.legend(lines, labels, loc='upper right')
# this is the same as before again
plt.title('Price trend analysis')
ax.xaxis.set_tick_params(rotation=90, color='k', size
ax.set_xlabel('year')
ax.set_ylabel('sold price')
ax2.set_ylabel('number of sales')
plt.savefig('chart.png', dpi=300,bbox_inches ='tight')
plt.show()

Why are bars missing in my stacked bar chart -- Python w/matplotlib

all.
I am trying to create a stacked bar chart built using time series data. My issue -- if I plot my data as time series (using lines) then everything works fine and I get a (messy) time series graph that includes correct dates. However, if I instead try to plot this as a stacked bar chart, my dates disappear and none of my bars appear.
I have tried messing with the indexing, height, and width of the bars. No luck.
Here is my code:
import pylab
import pandas as pd
import matplotlib.pyplot as plt
df1= pd.read_excel('pathway/filename.xls')
df1.set_index('TIME', inplace=True)
ax = df1.plot(kind="Bar", stacked=True)
ax.set_xlabel("Date")
ax.set_ylabel("Change in Yield")
df1.sum(axis=1).plot( ax=ax, color="k", title='Historical Decomposition -- 1 year -- One-Quarter Revision')
plt.axhline(y=0, color='r', linestyle='-')
plt.show()
If i change
ax = df1.plot(kind="Bar", stacked=True)
to ax = df1.plot(kind="line", stacked=False)
I get:
if instead I use ax = df1.plot(kind="Bar", stacked=True)
I get:
Any thoughts here?
Without knowing what the data looks like, I'd try something like this:
#Import data here and generate DataFrame
print(df.head(5))
A B C D
DATE
2020-01-01 -0.01 0.06 0.40 0.45
2020-01-02 -0.02 0.05 0.39 0.42
2020-01-03 -0.03 0.04 0.38 0.39
2020-01-04 -0.04 0.03 0.37 0.36
2020-01-05 -0.05 0.02 0.36 0.33
f, ax = plt.subplots()
ax.bar(df.index, df['A'])
ax.bar(df.index, df['B'])
ax.bar(df.index, df['C'], bottom=df['B'])
ax.plot(df.index, df['D'], color='black', linewidth=2)
ax.set_xlabel('Date')
ax.set_ylabel('Change in Yield')
ax.axhline(y=0, color='r')
ax.set_xticks([])
ax.legend()
plt.show()
Edit:: Ok, I've found a way looking at this post here:
Plot Pandas DataFrame as Bar and Line on the same one chart
Try resetting the index so that it is a separate column. In my example, it is called 'DATE'. Then try:
ax = df[['DATE','D']].plot(x='DATE',color='black')
df[['DATE','A','B','C']].plot(x='DATE', kind='bar',stacked=True,ax=ax)
ax.axhline(y=0, color='r')
ax.set_xticks([])
ax.set_xlabel('Date')
ax.set_ylabel('Change in Yield')
ax.legend()
plt.show()

Seaborn stripplot deletes plot

I have a boxplot:
fig, ax = plt.subplots(1,1)
bp = df.boxplot(column='transaction_value',
by='store_type', grid=True,
ax=ax, showfliers=True)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
ax.set_ylim([0, 800])
ax.set_ylabel('transaction_value')
plt.show()
I have a seaborn stripplot:
bplot=sns.stripplot(y='transaction_value', x='store_type',
data=df,
jitter=True,
marker='o',
alpha=0.1,
color='black')
When I try to overlay the stripplot on the boxplot, it deletes the first boxplot (on the very far left).
fig, ax = plt.subplots(1,1)
bp = df.boxplot(column='transaction_value',
by='store_type', grid=True,
ax=ax, showfliers=True)
bplot=sns.stripplot(y='transaction_value', x='store_type',
data=df,
jitter=True,
marker='o',
alpha=0.1,
color='black')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
ax.set_ylim([0, 500])
ax.set_ylabel('transaction_value')
plt.show()
How can I stop this from happening?
Added data example:
a
transaction_value store_type
0 30.927648 express
1 20.356693 extra
2 48.201950 metro
3 77.213957 metro
4 15.482211 superstore
5 85.794876 superstore
6 16.199844 extra
7 0.007816 superstore
8 50.925737 metro
9 81.393811 metro
10 7.616312 superstore
11 82.172441 metro
12 49.608503 extra
13 71.907878 metro
14 85.833738 superstore
15 88.131029 express
16 11.541427 extra
17 89.759724 metro
18 96.435902 superstore
19 91.984656 superstore
20 67.795293 metro
21 39.806654 superstore
22 39.565823 metro
23 37.507718 superstore
24 37.918300 metro
25 18.599158 metro
26 3.815219 extra
27 83.210068 express
28 3.988503 extra
29 94.298953 superstore
a = pd.read_clipboard()
fig, ax = plt.subplots(1,1)
bp = a.boxplot(column='transaction_value',
by='store_type', grid=True,
ax=ax, showfliers=True)
bplot=sns.stripplot(y='transaction_value', x='store_type',
data=a,
jitter=True,
marker='o',
alpha=0.1,
color='black')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
ax.set_ylim([0, 500])
ax.set_ylabel('transaction_value')
plt.show()
#ImportanceOfBeingErnest provided a solution in comments while I was typing, but I was going to suggest something else:
For better consistency, I would recommend to use seaborn to do the boxplots as well, this should ensure that both plots are laid out the same way,
fig, ax = plt.subplots(1,1)
sns.boxplot(y='transaction_value', x='store_type', data=df, ax=ax,
color='w')
sns.stripplot(y='transaction_value', x='store_type', data=df, ax=ax,
jitter=True,
marker='o',
alpha=0.1,
color='black')
ax.set_ylabel('transaction_value')
plt.show()

Adjusting varying thickness of bar in seaborn barplots for subplots

In a seaborn horizontal barplot, which has two set of barplots where one set is placed on top of another, how the axes of each can be controlled independanlty? Currently, I want to adjust the thickness of the bars based on the frequency of the occurrence of some entity in it.
Currently both the barplots are plotted with axes stored in ax1 and ax2. But I am able to adjust the thickness of the bar only for ax1 (lightblue in colour), but not for ax2 (dark blue. All bars have uniform thickness). I am not able to figure out how the assignment of ax2 needs to be done so as to adjust the bar thickness for the second set of bars as well.
How can varying length bars for both the barplots be obtained?
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
f, ax = plt.subplots(figsize=(15, 45))
crashes = plotie.groupby('target_wcount').mean()
sns.set_color_codes("pastel")
ax1 = sns.barplot(x="uno", y="indie", orient='h', data=crashes,
label="uno", color="b")
sns.set_color_codes("muted")
ax2 = sns.barplot(x="miss", y="indie", orient='h', data=crashes,
label="miss", color="b")
for bar, newwidth in zip(ax1.patches, summa):
bar.set_height(3*newwidth)
for bar, newwidth in zip(ax2.patches, summa):
bar.set_height(3*newwidth)
sns.despine(left=True, bottom=True)
f.savefig('filea')
Sample For data
output_wcount missing_count match_count uni indie uno miss
target_wcount
49 49.0 39.440000 9.560000 1.0 49 1.0 0.804898
48 48.0 36.730000 11.270000 1.0 48 1.0 0.765208
46 46.0 34.400000 11.600000 1.0 46 1.0 0.747826
45 45.0 33.940000 11.060000 1.0 45 1.0 0.754222
44 44.0 34.630000 9.370000 1.0 44 1.0 0.787045
43 43.0 31.420000 11.580000 1.0 43 1.0 0.730698
42 42.0 31.455000 10.545000 1.0 42 1.0 0.748929
41 41.0 29.630000 11.370000 1.0 41 1.0 0.722683
40 40.0 28.430000 11.570000 1.0 40 1.0 0.710750
39 39.0 27.935556 11.064444 1.0 39 1.0 0.716296
By using the twinx function this could be easily solved
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
f, ax = plt.subplots(figsize=(15, 45))
crashes = plotie.groupby('target_wcount').mean()
sns.set_color_codes("pastel")
ax1 = sns.barplot(x="uno", y="indie", orient='h', data=crashes,
label="uno", color="b")
sns.set_color_codes("muted")
ax2 = ax.twinx()
sns.barplot(x="miss", y="indie", orient='h', data=crashes,
label="miss", color="b",ax=ax2)
for bar, newwidth in zip(ax1.patches, summa):
bar.set_height(3*newwidth)
for bar, newwidth in zip(ax2.patches, summa):
bar.set_height(3*newwidth)
sns.despine(left=True, bottom=True)
f.savefig('filea')

Categories

Resources