Scatter plot to add Dates as data labels - python

Plotting scatters I am using below:
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
dates = ['2015-12-20','2015-09-12','2015-08-12','2015-06-12']
PM_25 = [68, 66, 55, 46]
dates = [pd.to_datetime(d) for d in dates]
plt.scatter(dates, PM_25, s =50, c = 'red')
plt.show()
For each of the scatters, I want to add data label 'date' to it. So I made these changes:
fig, ax = plt.subplots()
ax.scatter(dates, PM_25)
for i, txt in enumerate(dates):
ax.annotate(txt, i)
It doesn't work.
What's the right way to label them? Thank you.

You need both x and y when you annotate.
for i, txt in enumerate(dates):
ax.annotate(txt, (dates[i],PM_25[i]))

Related

Make Bar Chart With Binned X Values Python

I want to make a bar chart that has binned values every 10 x values. Here is my bins array:bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] I do not want to use a histogram because I have specific y values that I want to use, not just the frequency of the binned values. I have a pandas dataframe with two columns: yardline_100 (these are the values that are becoming "binned", they always fall between 0 and 100) and epa. I want to have my yardline_100 on the x and epa on the y. How do I do this? plt.hist() only takes one argument for data. And I can't figure out how to make plt.bar() work with binned values. Advice?
IIUC, do you want something like this?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'yardline_100':np.random.randint(0,100,200), 'epa':np.random.random(200)})
df['bin'] = pd.cut(df['yardline_100'], bins=range(0,101,10), labels=[f'{l}-{l+10}' for l in range(0,91,10)])
fig,ax = plt.subplots(2,2, figsize=(15,8))
ax=ax.flatten()
sns.stripplot(x='bin', y='epa', data=df, ax=ax[0])
sns.violinplot(x='bin', y='epa', data=df, ax=ax[1])
sns.boxplot(x='bin', y='epa', data=df, ax=ax[2])
sns.barplot(x='bin', y='epa', data=df, ax=ax[3])
Output:
Change bar width
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame({'yardline_100':np.random.randint(0,100,200), 'epa':np.random.random(200)})
df['bin'] = pd.cut(df['yardline_100'], bins=range(0,101,10), labels=[f'{l}-{l+10}' for l in range(0,91,10)])
fig,ax = plt.subplots(figsize=(15,8))
sns.barplot(x='bin', y='epa', data=df, ax=ax)
def change_width(ax, new_value) :
for patch in ax.patches :
current_width = patch.get_width()
diff = current_width - new_value
# we change the bar width
patch.set_width(new_value)
# we recenter the bar
patch.set_x(patch.get_x() + diff * .5)
change_width(ax, 1.)
Output:

trying to convert iso 8601 time to be used with matplotlib

I'm trying to read a csv file that has a date that looks like this: 2018-02-20T22:41:33.793000Z and I can't figure out how to use that as the x axis in matplotlib. I obviously have no idea what I"m doing with datetime because I can't seem to get it to be usefull numbers for matplotlib.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
import os
import matplotlib
import matplotlib.dates as mdates
import datetime
from datetime import datetime
data = pd.read_csv(os.path.join(os.path.dirname(__file__), 'new.csv'))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
b = data.iloc[:, 8] #reading from column 8
a = pd.to_datetime(b, format='%Y-%m-%dT%H:%M:%S.%fZ', errors='ignore')
print(a)
x = a #date #.iloc[:, 0] grabs column without header
y = data.iloc[:, 2] #number #reading from column 2
z = data.iloc[:, 6] #quantity #reading from column 6
#c = data.bluered #blue or red
ax.scatter(x, y, z) #ax.scatter(x, y, z, c=c)
plt.xlim(-1, 35)
plt.ylim(0, 300)
ax.set_zlim(-1,150)
plt.show()
After much frustration I found the problem I was having was just simply to change plt.xlim(-1, 35) to plt.xlim('2018-02-20 22:41:26.419000','2018-02-20 22:48:55.768000')...which is the date/time range I was looking at.

Matplotlib plot legend shows markers twice

The legend in my plot shows the marker icon twice in the legend
The code that produced this plot is given below
import pandas as pd
import random
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
N = 15
colors = cm.rainbow(np.linspace(0, 1, N))
df = []
for i in range(N):
s = 'NAME %d' % i
df.append(dict(x=random.random(), y=random.random(), name=s))
df = pd.DataFrame(df)
c = 0
labels = []
fig, ax = plt.subplots(figsize=(12,12))
for name, group in df.groupby('name'):
x = group['x'].values[0]
y = group['y'].values[0]
color = colors[c]
c += 1
ax.plot(x, y, color=color, marker='o', linestyle='', label=name)
labels.append(name)
handels, _ = ax.get_legend_handles_labels()
ax.legend(handels, labels)
Why is this happening?
My actual df has multiple entries for each name so that's why I do a groupby. Is there something I'm missing here?
you can either set plt.legend(loc=...,numpoints =1) directly or create a style sheet and set legend.numpoints : 1
If you use a linux system: place your stylesheets in ~/.config/matplotlib/stylelib/ you can use them with plt.style.use([your_style_sheet]). Additionally, you can e.g. make one sheet for the colors etc. and one for the size: plt.style.use([my_colors,half_column_latex])

Visualize 3 columns as a heatmap in seaborn / pandas [duplicate]

I need to create MatplotLib heatmap (pcolormesh) using Pandas DataFrame TimeSeries column (df_all.ts) as my X-axis.
How to convert Pandas TimeSeries column to something which can be used as X-axis in np.meshgrid(x, y) function to create heatmap? The workaround is to create Matplotlib drange using same parameters as in pandas column, but is there a simple way?
x = pd.date_range(df_all.ts.min(),df_all.ts.max(),freq='H')
xt = mdates.drange(df_all.ts.min(), df_all.ts.max(), dt.timedelta(hours=1))
y = arange(ylen)
X,Y = np.meshgrid(xt, y)
I do not know what you mean by heat map for a time series, but for a dataframe you may do as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product
from string import ascii_uppercase
from matplotlib import patheffects
m, n = 4, 7 # 4 rows, 7 columns
df = pd.DataFrame(np.random.randn(m, n),
columns=list(ascii_uppercase[:n]),
index=list(ascii_uppercase[-m:]))
ax = plt.imshow(df, interpolation='nearest', cmap='Oranges').axes
_ = ax.set_xticks(np.linspace(0, n-1, n))
_ = ax.set_xticklabels(df.columns)
_ = ax.set_yticks(np.linspace(0, m-1, m))
_ = ax.set_yticklabels(df.index)
ax.grid('off')
ax.xaxis.tick_top()
optionally, to print actual values in the middle of each square, with some shadows for readability, you may do:
path_effects = [patheffects.withSimplePatchShadow(shadow_rgbFace=(1,1,1))]
for i, j in product(range(m), range(n)):
_ = ax.text(j, i, '{0:.2f}'.format(df.iloc[i, j]),
size='medium', ha='center', va='center',
path_effects=path_effects)

Show values over matplotlib imshow plot [duplicate]

I need to create MatplotLib heatmap (pcolormesh) using Pandas DataFrame TimeSeries column (df_all.ts) as my X-axis.
How to convert Pandas TimeSeries column to something which can be used as X-axis in np.meshgrid(x, y) function to create heatmap? The workaround is to create Matplotlib drange using same parameters as in pandas column, but is there a simple way?
x = pd.date_range(df_all.ts.min(),df_all.ts.max(),freq='H')
xt = mdates.drange(df_all.ts.min(), df_all.ts.max(), dt.timedelta(hours=1))
y = arange(ylen)
X,Y = np.meshgrid(xt, y)
I do not know what you mean by heat map for a time series, but for a dataframe you may do as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product
from string import ascii_uppercase
from matplotlib import patheffects
m, n = 4, 7 # 4 rows, 7 columns
df = pd.DataFrame(np.random.randn(m, n),
columns=list(ascii_uppercase[:n]),
index=list(ascii_uppercase[-m:]))
ax = plt.imshow(df, interpolation='nearest', cmap='Oranges').axes
_ = ax.set_xticks(np.linspace(0, n-1, n))
_ = ax.set_xticklabels(df.columns)
_ = ax.set_yticks(np.linspace(0, m-1, m))
_ = ax.set_yticklabels(df.index)
ax.grid('off')
ax.xaxis.tick_top()
optionally, to print actual values in the middle of each square, with some shadows for readability, you may do:
path_effects = [patheffects.withSimplePatchShadow(shadow_rgbFace=(1,1,1))]
for i, j in product(range(m), range(n)):
_ = ax.text(j, i, '{0:.2f}'.format(df.iloc[i, j]),
size='medium', ha='center', va='center',
path_effects=path_effects)

Categories

Resources