I am trying to use python matplotlib to plot a pandas DataFrame. The DataFrame has a 'time' column and a 'val' column. The 'time' column is set as index and has resolution up to microseconds. When I go about plotting it, the values on the x-axis are are totally off (way outside the time range of the data). What could be wrong? Any help is appreciated.
Below is the code:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates
df = pd.read_csv("/tmp/a.csv")
df = df.set_index('time')
def plot1(df):
ax = df.plot(y='val')
ax.get_yaxis().get_major_formatter().set_useOffset(False)
ax.get_xaxis().set_major_formatter(matplotlib.dates.DateFormatter("%H%M%S.%f"))
plt.show()
return ax
plot1(df)
Data in '/tmp/a.csv':
time,val
143642.229348,12
143642.250195,53
143642.252341,17
143642.254349,56
143642.311674,31
143642.313758,36
143642.320217,24
143642.339777,86
You would need to convert your time column to datetime after reading it from the CSV file:
df['time'] = pd.to_datetime(df['time'], format="%H%M%S.%f")
alternatively you can do it on the fly when parsing your CSV file:
tm_parser = lambda x: pd.to_datetime(x, format="%H%M%S.%f")
df = pd.read_csv('/tmp/a.csv',
sep=',',
parse_dates=['time'],
date_parser=tm_parser,
index_col='time')
after that you don't need matplotlib.dates.DateFormatter:
In [147]: df.plot()
Out[147]: <matplotlib.axes._subplots.AxesSubplot at 0x8201f60>
Related
I create a pivot table and I want create a bar graph. This is my pivot_table:
I don't know how to stract the values of the column 1970 and use this information to make a bar graph.
Thanks!!
Just convert dataframe column names to str then you can select the data of year 1970 with df['1970']. Then, you can use pandas built-in plot.bar method to make a bar plot. Try this:
import pandas as pd
import matplotlib.pyplot as plt
#converting column names to string
df.columns = df.columns.astype(str)
#plotting a bar plot
df['1970'].plot.bar()
plt.show()
Examples based on #AlanDyke DataFrame:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[1970,'a',1],
[1970,'b',2],
[1971,'a',2],
[1971,'b',3]],
columns=['year','location', 'value'])
df = pd.pivot_table(df, values='value', index='location', columns='year')
df.columns = df.columns.astype(str)
df['1970'].plot.bar()
plt.show()
you can use plt.bar and slice the dataframe:
df = pd.DataFrame([[1970,'a',1],
[1970,'b',2],
[1971,'a',2],
[1971,'b',3]],
columns=['year','location', 'value'])
df = pd.pivot_table(df, values='value', index='location', columns='year')
plt.bar(list(df.transpose().columns), height=df[1970])
I would like to make a heatmap from a pandas DataFrame (or Series) with DateTimeIndex so that I have hours on the x-axis and days on the y-axis, both ticklabels displayed in DateTimeIndex style.
If I do the following:
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.DataFrame(np.random.randint(10, size=4*24*200))
df.index = pd.date_range(start='2019-02-01 11:30:00', periods=200*24*4, freq='15min')
df['minute'] = df.index.hour*60 + df.index.minute
df['dayofyear'] = df.index.month + df.index.dayofyear
df = df.pivot(index='dayofyear', columns='minute', values=df.columns[0])
sns.heatmap(df)
The index obviously loses the DateTime format:
What I instead want is something like this (which I achieved with a complicated, not generalizable function that apparently doesn't even work properly):
Does someone know a neat way to create this kind of heatmap with python?
EDIT:
The function I created:
def plot_heatmap(df_in, plot_column=0, figsize=(20,12), vmin=None, vmax=None, cmap='jet', xlabel='hour (UTC)', ylabel='day', rotation=0, freq='5s'):
'''
Plots heatmap with date labels
df_in: pandas DataFrame od pandas Series
plot_column: column to plot if DataFrame has multiple columns
...
'''
# convert to DataFrame in case a Series is passed:
try:
df_in = df_in.to_frame()
except AttributeError:
pass
# make copy in order not to overrite input (in case input is an object attribute)
df = df_in.copy()
# pad missing dates:
idx = pd.date_range(df_in.index[0], df_in.index[-1], freq=freq)
df = df.reindex(idx, fill_value=np.nan)
df['hour'] = df.index.hour*3600 + df.index.minute*60 + df.index.second
df['dayofyear'] = df.index.month + df.index.dayofyear
# Create mesh for heatmap plotting:
pivot = df.pivot(index='dayofyear', columns='hour', values=df.columns[plot_column])
# plot
plt.figure(figsize=figsize)
sns.heatmap(pivot, cmap=cmap)
# set xticks
plt.xticks(np.linspace(0,pivot.shape[1],25), labels=range(25))
plt.xlabel(xlabel)
# set yticks
ylabels = []
ypositions = []
day0 = df['dayofyear'].unique().min()
for day in df['dayofyear'].unique():
day_delta = day-day0
# create pandas Timestamp
temp_tick = df.index[0] + pd.Timedelta('%sD' %day_delta)
# check wheter tick shall be shown or not
if temp_tick.day==1 or temp_tick.day==15:
temp_tick_nice = '%s-%s-%s' %(temp_tick.year, temp_tick.month, temp_tick.day)
ylabels.append(temp_tick_nice)
ypositions.append(day_delta)
plt.yticks(ticks=ypositions, labels=ylabels, rotation=0)
plt.ylabel(ylabel)
The date format going away because you did:
df['dayofyear'] = df.index.month + df.index.dayofyear
Here, both series are integers, so df['dayofyear'] is also integer-typed.
Instead, do:
df['dayofyear'] = df.index.date
Then you get as output:
The best solution I found now that also works if the frequency of the DatetimeIndex is <1min is the following:
import pandas as pd
import numpy as np
import seaborn as sns
freq = '30s'
df = pd.DataFrame(np.random.randint(10, size=4*24*200*20))
df.index = pd.date_range(start='2019-02-01 11:30:00', periods=200*24*4*20, freq=freq)
df['hour'] = df.index.strftime('%H:%M:%S')
df['dayofyear'] = df.index.date
df = df.pivot(index='dayofyear', columns='hour', values=df.columns[0])
df.columns = pd.DatetimeIndex(df.columns).strftime('%H:%M')
df.index = pd.DatetimeIndex(df.index).strftime('%m/%Y')
xticks_spacing = int(pd.Timedelta('2h')/pd.Timedelta(freq))
ax = sns.heatmap(df, xticklabels=xticks_spacing, yticklabels=30)
plt.yticks(rotation=0)
Which produces this result:
The only flaw yet is that the month ticks positions are not well defined and precise with this method...
I'm working with a DataFrame. My data is using for a Candlestick.
The problem is I can't remove the weekend dates. I mean, my code shows this:
enter image description here
And I'm looking for this:
enter image description here
Here is my code:
import matplotlib.ticker as ticker
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_finance import candlestick_ohlc
df = pd.read_csv('AAPL.csv')
df['Date'] = pd.to_datetime(df['Date'])
df["Date"] = df["Date"].apply(mdates.date2num)
dates = df['Date'].tolist()
ohlc = df[['Date', 'Open', 'High', 'Low','Close']]
f1, ax = plt.subplots(figsize = (12,6))
candlestick_ohlc(ax, ohlc.values, width=.5, colorup='green', colordown='red')
ax.xaxis.set_major_locator(ticker.MultipleLocator(1.0))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.setp(ax.get_xticklabels(), rotation=70, fontsize=7)
close = df['Close'].values
plt.plot(dates,close, marker='o')
plt.show()
Dataframe:
Date,Open,High,Low,Close,Adj Close,Volume
2019-02-04,167.410004,171.660004,167.279999,171.250000,170.518677,31495500
2019-02-05,172.860001,175.080002,172.350006,174.179993,173.436157,36101600
2019-02-06,174.649994,175.570007,172.850006,174.240005,173.495911,28239600
2019-02-07,172.399994,173.940002,170.339996,170.940002,170.210007,31741700
2019-02-08,168.990005,170.660004,168.419998,170.410004,170.410004,23820000
2019-02-11,171.050003,171.210007,169.250000,169.429993,169.429993,20993400
2019-02-12,170.100006,171.000000,169.699997,170.889999,170.889999,22283500
2019-02-13,171.389999,172.479996,169.919998,170.179993,170.179993,22490200
2019-02-14,169.710007,171.259995,169.380005,170.800003,170.800003,21835700
2019-02-15,171.250000,171.699997,169.750000,170.419998,170.419998,24626800
2019-02-19,169.710007,171.440002,169.490005,170.929993,170.929993,18972800
2019-02-20,171.190002,173.320007,170.990005,172.029999,172.029999,26114400
2019-02-21,171.800003,172.369995,170.300003,171.059998,171.059998,17249700
2019-02-22,171.580002,173.000000,171.380005,172.970001,172.970001,18913200
2019-02-25,174.160004,175.869995,173.949997,174.229996,174.229996,21873400
2019-02-26,173.710007,175.300003,173.169998,174.330002,174.330002,17070200
2019-02-27,173.210007,175.000000,172.729996,174.869995,174.869995,27835400
2019-02-28,174.320007,174.910004,172.919998,173.149994,173.149994,28215400
This is "NOT" enough solution, but I can suggest something for u.
Just use
import mplfinance as mpf
mpf.plot(df, type='candle')
This ignores non-trading days automatically in the plot and make me happier little bit, though I couldn't be fully-satisfied with. I hope this would help u.
Check this out.
https://github.com/matplotlib/mplfinance#basic-usage
You can slice it from the dataframe before processing
please check this link Remove non-business days rows from pandas dataframe
Do not use date/time as your index but use a candle number as index.
then your data becomes continuously and you have no interruption of the time series.
So use candle number as Index , for plotting the data you need to plot it not with a date/time
If you want plot with a date/time you need to use a column where you have put the timestamp of the candle and put that into a plot .. but then you will have gaps again.
Try to filter your dataframe.
df = df[df.Open.notnull()]
Add this to your plot.
show_nontrading=False
I looked at the responses to this original question (see here but doesn't seem to solve my issue.)
import pandas as pd
import pandas_datareader.data
import datetime
import matplotlib.pyplot as plt
df = pd.read_csv(mypath + filename, \
skiprows=4,index_col=0,usecols=['Day', 'Cushing OK Crude Oil Future Contract 1 Dollars per Barrel'], \
skipfooter=0,engine='python')
df.index = pd.to_datetime(df.index)
fig = plt.figure(figsize=plt.figaspect(0.25))
ax = fig.add_subplot(1,1,1)
ax.grid(axis='y',color='lightgrey', linestyle='--', linewidth=0.5)
ax.grid(axis='x',color='lightgrey', linestyle='none', linewidth=0.5)
df['Cushing OK Crude Oil Future Contract 1 Dollars per
Barrel'].plot(ax=ax,grid = True, \
color='blue',fontsize=14,legend=False)
plt.show()
The graph turns out fine but I can't figure out a way to show only a certain date range. I have tried everything.
type(df) = pandas.core.frame.DataFrame
type(df.index) = pandas.core.indexes.datetimes.DatetimeIndex
also, the format for the column 'Day' is YYYY-MM-DD
Assuming you have a datetime index on your dataframe (it looks that way), you can slice using .loc like so:
% matplotlib inline
import pandas as pd
import numpy as np
data = pd.DataFrame({'values': np.random.rand(31)}, index = pd.date_range('2018-01-01', '2018-01-31'))
# Plot the entire dataframe.
data.plot()
# Plot a slice of the dataframe.
data.loc['2018-01-05':'2018-01-10', 'values'].plot(legend = False)
Gives:
The orange series is the slice.
enter image description here
I want to draw the close price (y-axis) and date (x-axis) with python, but the error shows that I need to convert date from string to float.
Here is coding:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import datetime
from pandas import DataFrame, Series
df = pd.read_csv('C:/Users/Vicky/Desktop/pythontest/T1706dailyrecord.csv')
df.columns = [1,2,3,4,5]
print(df)
plt.plot(df[1], df[3])
I think you need parameter parse_dates for convert column to datetime in read_csv:
df = pd.read_csv('C:/Users/Vicky/Desktop/pythontest/T1706dailyrecord.csv', parse_dates=[0])
Or:
df=pd.read_csv('C:/Users/Vicky/Desktop/pythontest/T1706dailyrecord.csv',parse_dates=['Date'])
Also df.columns = [1,2,3,4,5] is not necessary, for select use: df['Date'] and df['Close']:
plt.plot(df['Date'], df['Close'])
Also is possible use DataFrame.plot:
df.plot(x='Date', y='Close')