I am trying to alter the x-ticks on the plot below. When I run the code below I'm getting an error:
ValueError: unit abbreviation w/o a number
I can't seem to find anything on this except it's related to pd.to_timedelta. However, I can't find any solutions on this.
I've upgraded all relevant packs including matplotlib.
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots()
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
plt.scatter(x_numbers, y)
xaxis = ax.get_xaxis()
ax.set_xticklabels([str(pd.Timedelta(i.get_text()+' seconds')).split()[2] for i in xaxis.get_majorticklabels()], rotation=45)
plt.show()
Any suggestions? Has anyone come across this?
Based on this SO question and answer, one solution is to trigger axis tick positioning with a call to fig.canvas.draw() after the scatter, but before getting the labels:
[...]
plt.scatter(x_numbers, y)
# draw canvas to trigger tick positioning
fig.canvas.draw()
xaxis = ax.get_xaxis()
[...]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
fig, axes = plt.subplots(figsize=(10, 4))
axes.scatter(x_numbers, y)
axes.set_xticks(x_numbers)
axes.set_xticklabels([i+' seconds' for i in df['A'].get_values()], rotation=90)
plt.tight_layout()
output:
Related
Wondering How to add
Marker + Corresponding value to the last point of a series.
To plot my series I use :
var= pd.read_excel("ExcelFilePath")
x = list(var['Date'])
y = list(var['Values'])
plt.plot(x,y,label='blabla')
Which Give (For example) :
How would I get this :
You could use annotate:
import numpy as np
x = np.linspace(0,6.5)
y = np.sin(x)
plt.plot(x,y,label='blabla')
plt.plot(x[-1], y[-1], marker='+')
plt.annotate(f'({x[-1]:.2f}, {y[-1]:.2f})', (x[-1], y[-1]), ha='right')
output:
You could use the Plotly Library for this.
e.g.
import plotly.express as px
df = px.data.gapminder().query("continent == 'Oceania'")
fig = px.line(df, x='year', y='lifeExp', color='country', markers=True)
fig.show()
This will give you an output:
I would like to make a scatterplot with the dataframe :"df_death_mois1". But it doesn't work. The error message is : "x and y must be the same size". Can you help me ?
import pandas as pd
import matplotlib.pyplot as plt
members = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/members.csv")
expeditions = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/expeditions.csv")
expeditions['highpoint_date'] = pd.to_datetime(expeditions['highpoint_date'])
lesmois = expeditions['highpoint_date'].dt.month
expeditions["mois"] = lesmois
expeditions
df_members_mois = pd.merge(members,expeditions[['expedition_id','mois']], on='expedition_id', how='inner')
df_death_mois = df_members_mois[df_members_mois["death_cause"]=="Avalanche"]
df_death_mois
df_death_mois1 = df_death_mois.groupby("mois")['death_cause'].count()
df_death_mois1 = df_death_mois1.to_frame()
df_death_mois1
plt.scatter(x="mois", y = "death_cause", data = df_death_mois1)
plt.title('scatterplot')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
reset_index and then call plot.scatter:
>>> df_death_mois1.reset_index().plot.scatter(x="mois", y="death_cause")
With matplotlib.pyplot you can use:
>>> plt.scatter(x=df_death_mois1.index, y=df_death_mois1["death_cause"])
I am trying to insert timestamps on the x-axis for a scatter plot instead of total seconds. Below is what I have tried thus far but I'm getting an error with this line;
loc, labels = ax.set_xticks(x)
AttributeError: 'NoneType' object has no attribute 'update'
Example:
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots()
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
ax.scatter(x_numbers, y)
loc, labels = ax.set_xticks(x)
newlabels = [str(pd.Timedelta(str(i)+ ' seconds')).split()[2] for i in loc]
ax.set_xticks(loc, newlabels)
Note
I need to use ax instead of plt as this plot is called as a subplot. If I use plot, the axis will be assigned to the last subplot instead of the designated one.
I would suggest to use datetimes directly without messing with the ticklabels. Using a matplotlib.dates.MinuteLocator in addition can give you nice positions of the ticks.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00',
'10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
df['A'] = pd.to_datetime(df['A'])
fig,ax = plt.subplots()
ax.scatter(df["A"].values, df["B"].values)
ax.set_xlim(df["A"].min(), df["A"].max())
ax.xaxis.set_major_locator(mdates.MinuteLocator((0,30)))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
plt.show()
I am taking a guess, but if you want to replace the x-axis labels give this a try.
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
fig,ax = plt.subplots(figsize=(10,7))
ax.scatter(x_numbers, y)
xLabel = [str(int(num)) + ' seconds' for num in x_numbers]
ax.set_xticklabels(xLabel)
plt.tight_layout()
plt.show()
Something like this will work:
Edit: Changes made to make sure axis and subplot is used
import pandas as pd
import matplotlib.pyplot as plt
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots()
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
ax.scatter(x_numbers, y)
plt.sca(ax) # gets handle on the current axis
loc, labels = plt.xticks()
plt.xticks(loc, [str(a) for a in x])
plt.show()
I have a scatter plot that has time on the x-axis
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots()
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
plt.scatter(x_numbers, y)
plt.show()
Output 1:
I wanted to swap total seconds for actual timestamps so I included:
plt.xticks(x_numbers, x)
This results in the x-ticks overlapping each other.
If I use:
plt.locator_params(axis='x', nbins=10)
The results is the same as above. If I change the nbins to something smaller the ticks don't overlap but they don't align with their respective scatter points. As in the scatter points don't line up with the correct timestamp.
If I use:
M = 10
xticks = ticker.MaxNLocator(M)
ax.xaxis.set_major_locator(xticks)
The ticks don't overlap but the don't align with their respective scatter points.
Is it possible to pick the number of x-ticks you use but is still aligned to the respective data point.
E.g. For the figure below. Can I just use n number of ticks instead of all of them?
Output 2:
Let use some xticklabel manipulations:
d = ({
'A' : ['08:00:00','08:10:00','08:12:00','08:26:00','08:29:00','08:31:00','10:10:00','10:25:00','10:29:00','10:31:00'],
'B' : ['1','1','1','2','2','2','7','7','7','7'],
'C' : ['X','Y','Z','X','Y','Z','A','X','Y','Z'],
})
df = pd.DataFrame(data=d)
fig,ax = plt.subplots()
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds())
plt.scatter(x_numbers, y)
loc, labels = plt.xticks()
newlabels = [str(pd.Timedelta(str(i)+ ' seconds')).split()[2] for i in loc]
plt.xticks(loc, newlabels)
plt.show()
Output:
Firstly, the time interval is not consistent.
Secondly, it's a high-frequency series.
In a general case, you won't be required to match the xticks corresponding to each entry. And, in those scenarios, you can exploit something like plt.plot_date(x, y) along-with tick locators and formatters like, DayLocator() and DateFormatter('%Y-%m-%d').
Though for this very specific case where data is at minute level and few points are really close, the hack may be to try and play with the numeric Series you are using for x-axis, x_numbers. For increasing the gap between two points, I tried cumsum() and for eliminate overlapping to an extent, gave some rotation to xticks.
fig, ax = plt.subplots(figsize=(10,6))
x = df['A']
y = df['B']
x_numbers = (pd.to_timedelta(df['A']).dt.total_seconds()).cumsum()
plt.scatter(x_numbers, y)
plt.xticks(x_numbers, x, rotation=50)
plt.show()
I have many data frames that I am plotting for a presentation. These all have different columns, but all contain the same additional column foobar. At the moment, I am plotting these different data frames using
df.plot(secondary_y='foobar')
Unfortunately, since these data frames all have different additional columns with different ordering, the color of foobar is always different. This makes the presentation slides unnecessary complicated. I would like, throughout the different plots, assign that foobar is plotted bold and black.
Looking at the docs, the only thing coming close appears to be the parameter colormap - I would need to ensure that the xth color in the color map is always black, where x is the order of foobar in the data frame. Seems to be more complicated than it should be, also this wouldn't make it bold.
Is there a (better) approach?
I would suggest using matplotlib directly rather than the dataframe plotting methods. If df.plot returned the artists it added instead of an Axes object it wouldn't be too bad to change the color of the line after it was plotted.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def pandas_plot(ax, df, callout_key):
"""
Parameters
----------
ax : mpl.Axes
The axes to draw to
df : DataFrame
Data to plot
callout_key : str
key to highlight
"""
artists = {}
x = df.index.values
for k, v in df.iteritems():
style_kwargs = {}
if k == callout_key:
style_kwargs['c'] = 'k'
style_kwargs['lw'] = 2
ln, = ax.plot(x, v.values, **style_kwargs)
artists[k] = ln
ax.legend()
ax.set_xlim(np.min(x), np.max(x))
return artists
Usage:
fig, ax = plt.subplots()
ax2 = ax.twinx()
th = np.linspace(0, 2*np.pi, 1024)
df = pd.DataFrame({'cos': np.cos(th), 'sin': np.sin(th),
'foo': np.sin(th + 1), 'bar': np.cos(th +1)}, index=th)
df2 = pd.DataFrame({'cos': -np.cos(th), 'sin': -np.sin(th)}, index=th)
pandas_plot(ax, df, 'sin')
pandas_plot(ax2, df2, 'sin')
Perhaps you could define a function which handles the special column in a separate plot call:
def emphasize_plot(ax, df, col, **emphargs):
columns = [c for c in df.columns if c != col]
df[columns].plot(ax=ax)
df[col].plot(ax=ax, **emphargs)
Using code from tcaswell's example,
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def emphasize_plot(ax, df, col, **emphargs):
columns = [c for c in df.columns if c != col]
df[columns].plot(ax=ax)
df[col].plot(ax=ax, **emphargs)
fig, ax = plt.subplots()
th = np.linspace(0, 2*np.pi, 1024)
df = pd.DataFrame({'cos': np.cos(th), 'foobar': np.sin(th),
'foo': np.sin(th + 1), 'bar': np.cos(th +1)}, index=th)
df2 = pd.DataFrame({'cos': -np.cos(th), 'foobar': -np.sin(th)}, index=th)
emphasize_plot(ax, df, 'foobar', lw=2, c='k')
emphasize_plot(ax, df2, 'foobar', lw=2, c='k')
plt.show()
yields
I used #unutbut's answer and extended it to allow for a secondary y axis and correct legends:
def emphasize_plot(ax, df, col, **emphargs):
columns = [c for c in df.columns if c != col]
ax2 = ax.twinx()
df[columns].plot(ax=ax)
df[col].plot(ax=ax2, **emphargs)
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc=0)