Python: Need help creating an Intraday 1m tick OHLC Chart - python

I am pretty much brand new to all things python, and much to my chagrin I have been trying to produce a fairly straight forward OHLC chart. Code below with dataframe samples.
I am trying to plot and save an OHLC chart, for a single stock, on a single trading day, in 1m ticks. The yaxis appears to working fine, however the chart when shown is blank. The xaxis is showing the starting time of 09:30 but with no other 1m ticks. Moving the chart over the blank figure shows values for the yaxis but the x= nada.
Example
What I am hoping to eventually achieve, is the xaxis label to show the time, in minutes, no dae required, 90 degrees rotated, at say 15min intervals. I would rather an OHLC chart than a candlestick, but I also want it to be decipherable, as I have seen many versions that are just a blur of tiny vertical lines that are no use to anyone. If the size needs to be stretched horizontally in order to fit the some 376 1m records in the dataframe, then so be it. If it is too cluttered then I would like to be able to space out the tick interval perhaps to every 2 or 5 mins. The xaxis xticks should still remain at 15min intervals however. I would like to then save the result as a jpg.
I have tried so many variations of mplfinace, now no longer know what is the most recent of valid module. I have tried both 'quotes' and values in the candlestick_ohlc statement, there seems to be no apparent difference. I have read and re-read and tried so many examples but all seems to fail at the translation of the time in all things to do with the xaxis and it is very confusing for me to understand and beyond frustrating .. heh.
If anyone could kindly point me in the right direction here I would be very grateful for any and all assistance.
Many thanks, Tim.D
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mplfinance.original_flavor import candlestick_ohlc
sym = sys.argv[1] #symbol in all caps
run_dt = sys.argv[2] #run date of the required process requires the date to be surrounded by 'quotes'
run_int = sys.argv[2].replace('/', '-')
run_int = run_int.replace("'", "")
import pyodbc #database connectivity
cnxn = pyodbc.connect(dsn='abc', user='abc', password='abc', autocommit=False)
df = pd.read_sql_query(" \
SELECT TIMESTAMP(ACT_DATE||' '||TIME(TICK)) AS TIME, OPEN, HIGH, LOW, CLOSE \
FROM INTRADAY_IDX \
WHERE ACT_DATE = "+run_dt+" \
AND SYMBOL = '"+sym+"' \
ORDER BY 1",cnxn, )
print(df)
This produces a dataframe as follows:
TIME OPEN HIGH LOW CLOSE
0 2021-02-12 09:30:00 314.27 314.50 314.22 314.49
1 2021-02-12 09:31:00 314.51 314.73 314.44 314.63
2 2021-02-12 09:32:00 314.63 314.79 314.54 314.73
.. ... ... ... ... ...
375 2021-02-12 15:59:00 315.01 315.14 314.85 315.00
376 2021-02-12 16:00:00 315.00 315.18 314.97 315.18
df.TIME = mdates.date2num(df.TIME.dt.to_pydatetime())
print(df.head(5))
TIME OPEN HIGH LOW CLOSE
0 737833.395833 314.27 314.50 314.22 314.49
1 737833.396528 314.51 314.73 314.44 314.63
2 737833.397222 314.63 314.79 314.54 314.73
3 737833.397917 314.83 314.89 314.76 314.85
...
#quotes = [tuple(x) for x in df[['TIME', 'OPEN', 'HIGH', 'LOW', 'CLOSE']].to_records(index=False)]
#print(quotes)
fig, ax = plt.subplots(figsize=(12,7))
plt.yscale('linear') #default scaling of the y axis
ax.set_xlim('09:30', '16:00') #sets the start and end values for the xaxis charting
start, end = ax.get_xlim() #initializes the start and end variables
ax.xaxis.set_ticks(np.arange(start, end, 1800)) #sets the tick values for charting
plt.xticks(rotation=90, fontsize=12) #sets the rotation value of the x axis ticks
plt.yticks(fontsize=12)
ax.set_title(sym+' OHLC Intraday Chart', fontsize=14, fontweight = 'bold')
ax.set_ylabel('Price', fontsize=12, fontweight = 'bold')
ax.set_xlabel('Time', fontsize=12, fontweight = 'bold')
plt.tight_layout() #reduces the space padding surrounding the graph
ax.grid(True)
candlestick_ohlc(ax, df.values, width = 1/(24*60*2.5), alpha = 1.0, colorup = 'g', colordown ='r')
#candlestick_ohlc(ax, quotes, width = 1/(24*60*2.5), alpha = 1.0, colorup = 'g', colordown ='r')
bbox_inches='tight') #saves the data to to jpg file
#plt.savefig('c:\\temp\\charts\\'+sym+'_OHLC_'+run_int+'.jpg', format='jpg', quality=95, #plt.close()
plt.show()

and thanks for much for the response. Using your code I have managed to get it working now, also adding a secondary plot. Code below:
import sys, os, time, warnings #csv
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
#import numpy as np
#from datetime import datetime, date, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#from matplotlib import dates, ticker
from mplfinance.original_flavor import candlestick_ohlc
sym = sys.argv[1] #symbol in all caps
run_dt = sys.argv[2] #run date of the required process requires the date to be surrounded by 'quotes'
run_int = sys.argv[2].replace('/', '-') #reformat the date
run_int = run_int.replace("'", "") #reformat the date
import pyodbc #database connectivity
cnxn = pyodbc.connect(dsn='abc', user='abc', password='abc', autocommit=False)
db = pd.read_sql_query(" \
SELECT timestamp(ACT_DATE||' '||TIME(TICK)) AS TIME, OPEN, HIGH, LOW, CLOSE \
FROM SQ4_INTRADAY_IDX \
WHERE ACT_DATE = "+run_dt+" \
AND SYMBOL = '"+sym+"' \
ORDER BY 1",cnxn, )
print(db)
db['TIME']= pd.to_datetime(db['TIME'])
db.set_index('TIME', inplace=True) #this resets the dataframe index to the time values
#db.info() #shows column data types
#setup an array for the candlestick chart
dd = db.copy() #create a copy of the dataframe
dd.index = mdates.date2num(dd.index) #set the datetime to numeric for the chart to work
dd_data = dd.reset_index().values #set the index
#print(dd_data)
clse = db["CLOSE"] #setup the data for plotting an additional subplot line
fig, ax = plt.subplots(figsize=(12,7))
ax.set_title(sym+' OHLC Intraday Chart', fontsize=14, fontweight='bold')
ax.set_ylabel('Price', fontsize=12, fontweight='bold')
ax.set_xlabel('Time', fontsize=12, fontweight='bold')
candlestick_ohlc(ax, dd_data, width=.0003, alpha=.8, colorup='g', colordown='r')
ax.plot(clse, color = 'k', linestyle='--', linewidth = .5, label='Close')
plt.xticks(rotation=90, fontsize=12) #sets the rotation value of the x axis ticks
plt.yticks(fontsize=12) #sets the rotation value of the x axis ticks
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=30))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.tight_layout() #reduces the space padding surrounding the graph
plt.savefig('c:\\temp\\'+sym+'_OHLC Intrday Chart for '+run_int+'.jpg', format='jpg', quality=95, bbox_inches='tight') #saves the data to to jpg file
plt.show()
This produces the attached chart.
My issue is that I am trying to remove the padded space between the left and right y-axis scales. In other words I would like the 9:30 label to appear directly under the left and 16:00 under the right margins. Basically I guess I am trying to stretch the chart to fill the entire chart box.
Also is there anyway to add the left Price scale values to both the left and right sides ?
Thanks for assist, much appreciated.
Regards, Tim.D

The argument of this function must be an array. Also, the format of the date and time must be converted to mdates2num(). The rest of the time, the date and time are controlled using a locator and a formatter. I think ax.set_xlim('09:30', '16:00') related in your code is the cause of the error. The data acquisition is from Yahoo Finance.
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from mplfinance.original_flavor import candlestick_ohlc
import yfinance as yf
dia = yf.download("DIA", period='1d', interval='1m', start="2021-02-11", end='2021-02-12')
df = dia.copy()
df.index = mdates.date2num(df.index)
data = df.reset_index().values
fig, ax = plt.subplots(figsize=(12,7))
sym = 'DIA'
candlestick_ohlc(ax, data, width=1/(24*60*2.5), alpha=1.0, colorup='g', colordown='r')
ax.set_title(sym+' OHLC Intraday Chart', fontsize=14, fontweight='bold')
ax.set_ylabel('Price', fontsize=12, fontweight='bold')
ax.set_xlabel('Time', fontsize=12, fontweight='bold')
# update start
ax.set_xlim(data[0][0], data[382][0])
ax1 = ax.twinx()
ax1.set_yticks(ax.get_yticks())
ax1.set_ybound(ax.get_ybound())
ax1.set_yticklabels([str(x) for x in ax.get_yticks()])
# update end
ax.grid()
locator = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(locator))
plt.show()

Related

How can I adjust the bounds of the x tick values that are automatically chosen by matplotlib?

I have a graph that shows the closing price of a stock throughout a day at each five minute interval. The x axis shows the time and the range of x values is from 9:30 to 4:00 (16:00).
The problem is that the automatic bounds for the x axis go from 9:37 to 16:07 and I really just want it from 9:30 to 16:00.
The code I am currently running is this:
stk = yf.Ticker(ticker)
his = stk.history(interval="5m", start=start, end=end).values.tolist() #open - high - low - close - volume
x = []
y = []
count = 0
five_minutes = datetime.timedelta(minutes = 5)
for bar in his:
x.append((start + five_minutes * count))#.strftime("%H:%M"))
count = count + 1
y.append(bar[3])
plt.clf()
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval=30))
plt.plot(x, y)
plt.gcf().autofmt_xdate()
plt.show()
And it produces this plot (currently a link because I am on a new user account):
I thought I was supposed to use the axis.set_data_interval function providing, so I did so by providing datetime objects representing 9:30 and 16:00 as the min and the max. This gave me the error:
TypeError: '<' not supported between instances of 'float' and 'datetime.datetime'
Is there another a way for me to be able to adjust the first xtick and still have it automatically fill in the rest?
This problem can be fixed by adjusting the way you use the mdates tick locator. Here is an example based on the one shared by r-beginners to make it comparable. Note that I use the pandas plotting function for convenience. The x_compat=True argument is needed for it to work with mdates:
import pandas as pd # 1.1.3
import yfinance as yf # 0.1.54
import matplotlib.dates as mdates # 3.3.2
# Import data
ticker = 'AAPL'
stk = yf.Ticker(ticker)
his = stk.history(period='1D', interval='5m')
# Create pandas plot with appropriately formatted x-axis ticks
ax = his.plot(y='Close', x_compat=True, figsize=(10,5))
ax.xaxis.set_major_locator(mdates.MinuteLocator(byminute=[0, 30]))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M', tz=his.index.tz))
ax.legend(frameon=False)
ax.figure.autofmt_xdate(rotation=0, ha='center')
The sample data was created by obtaining Apple's stock price from Yahoo Finance. The desired five-minute interval labels are a list of strings obtained by using the date function to get the start and end times at five-minute intervals.
Based on this, the x-axis is drawn as a graph of the number of five-minute intervals and the closing price, and the x-axis is set to any interval by slicing.
import yfinance as yf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
ticker = 'AAPL'
stk = yf.Ticker(ticker)
his = stk.history(period='1D',interval="5m")
his.reset_index(inplace=True)
time_rng = pd.date_range('09:30','15:55', freq='5min')
labels = ['{:02}:{:02}'.format(t.hour,t.minute) for t in time_rng]
fig, ax = plt.subplots()
x = np.arange(len(his))
y = his.Close
ax.plot(x,y)
ax.set_xticks(x[::3])
ax.set_xticklabels(labels[::3], rotation=45)
plt.show()

Why am I getting junk date values on x-axis in matplotlib?

I am new to Python and learning data visualization using matplotlib.
I am trying to plot Date/Time vs Values using matplotlib from this CSV file:
https://drive.google.com/file/d/1ex2sElpsXhxfKXA4ZbFk30aBrmb6-Y3I/view?usp=sharing
Following is the code snippet which I have been playing around with:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
plt.style.use('seaborn')
years = mdates.YearLocator()
months = mdates.MonthLocator()
days = mdates.DayLocator()
hours = mdates.HourLocator()
minutes = mdates.MinuteLocator()
years_fmt = mdates.DateFormatter('%H:%M')
data = pd.read_csv('datafile.csv')
data.sort_values('Date/Time', inplace=True)
fig, ax = plt.subplots()
ax.plot('Date/Time', 'Discharge', data=data)
# format the ticks
ax.xaxis.set_major_locator(minutes)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(hours)
datemin = min(data['Date/Time'])
datemax = max(data['Date/Time'])
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y.%m.%d %H:%M')
ax.format_ydata = lambda x: '%1.2f' % x # format the price.
ax.grid(True)
fig.autofmt_xdate()
plt.show()
The code is plotting the graph but it is not labeling the X-Axis and also giving some unknown values (on mouse over) for x on the bottom right corner as shown in the below screenshot:
Screenshot of matplotlib figure window
Can someone please suggest what changes are needed to plot the x-axis dates and also make the correct values appear when I move the cursor over the graph?
Thanks
I haven't used matplotlib. Instead I used pandas plotting
import pandas as pd
data = pd.read_csv('datafile.csv')
data.sort_values('Date/Time', inplace=True)
data["Date/Time"] = pd.to_datetime(data["Date/Time"], format="%d.%m.%Y %H:%M")
ax = data.plot.line(x='Date/Time', y='Discharge')
Here, you need to convert the Date/Time to pandas datetime type.
The main issue you have there is that the date formats are mixed up - your data uses '%d.%m.%Y %H:%M', but you set '%Y.%m.%d %H:%M' and this is why you saw 'rubbish' values in x ticks labels. Anyway the number of lines in your code can be reduced heavily if you convert your Date/Time column to timestamps, ie.:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
plt.style.use('seaborn')
data = pd.read_csv('datafile.csv')
data.sort_values('Date/Time', inplace=True)
data["Date/Time"] = pd.to_datetime(data["Date/Time"], format="%d.%m.%Y %H:%M")
data.sort_values('Date/Time', inplace=True)
fig, ax = plt.subplots()
ax.plot('Date/Time', 'Discharge', data=data)
ax.format_xdata = mdates.DateFormatter('%Y.%m.%d %H:%M')
ax.tick_params(axis='x', rotation=45)
ax.grid(True)
fig.autofmt_xdate()
plt.show()
Note that the format of labels in the plot will depend on the zoom level, so you will need to enlarge a portion of the graph to see hours and minutes in the tick labels, but the cursor locator on the bottom bar of the window should be always displaying the detailed timestamp under the cursor.

Measurement length for X and Y-axis

I wonder if it's possible to change the measurement milestones for graphs created by pandas. In my code the X-axis stands for time and is measured by month, but the measurement milestones are all over the place.
In the image below, the milestones for the X-axis are 2012M01, 2012M06, 2012M11, 2013M04 and 2013M09.
Is there any way I can choose how long the distance should be between every milestone? For example, to make it so it shows every year or every half year?
This is the code I used for the function making the graph:
def graph(dataframe):
graph = dataframe[["Profit"]].plot()
graph.set_title('Statistics')
graph.set_ylabel('Thousand $')
graph.set_xlabel('Time')
plt.grid(True)
plt.show()
The actual dataframe is just an excel-file with a bunch of months and monetary values in it.
I think the most straight forward is to use matplotlib.dates to format the axis:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def graph(dataframe):
fig, ax = plt.subplots()
xfmt = mdates.DateFormatter('%YM%m') #see https://strftime.org/
major = mdates.MonthLocator([1,7]) #label only Jan and Jul
graph = dataframe[["Profit"]].plot(ax=ax) #link plot to the existing axes
graph.set_title('Statistics')
graph.set_ylabel('Thousand $')
graph.set_xlabel('Time')
graph.xaxis.set_major_locator(major) #set major locator tick on x-axis
graph.xaxis.set_major_formatter(xfmt) #format xtick label
plt.grid(True)
plt.show()
But a key point is you need to have your dates as Python's built-in datetime.date (not datetime.datetime); thanks to this answer. If your dates are str or a different type of datetime, you will need to convert, but there are many resources on SO and elsewhere for doing this like this or this:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
dr = [pd.to_datetime(date).date() for date in df.index] #explicitly converting to datetime with .date()
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
type(df.index.[0])
Out[0]:
datetime.date
Calling graph(df) using the example above gets this plot:
Just to expand on this, here's what happens when the index is pandas.Timestamp instead of datetime.date:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
# dr = [pd.to_datetime(date).date() for date in df.index] #skipping date conversion
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
graph(df)
Out[0]:
The x-axis is improperly formatted:
However, if you are willing to just create the plot directly through matplotlib, rather than pandas (pandas is using matplotlib anyway), this can handle more types of dates:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
# dr = [pd.to_datetime(date).date() for date in df.index] #skipping date conversion
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
def graph_2(dataframe):
fig, ax = plt.subplots()
xfmt = mdates.DateFormatter('%YM%m')
major = mdates.MonthLocator([1,7])
ax.plot(dataframe.index,dataframe['Profit'], label='Profit')
ax.set_title('Statistics')
ax.set_ylabel('Thousand $')
ax.set_xlabel('Time')
ax.xaxis.set_major_locator(major)
ax.xaxis.set_major_formatter(xfmt)
ax.legend() #legend needs to be added
plt.grid(True)
plt.show()
graph_2(df)
type(df.index[0])
Out[0]:
pandas._libs.tslibs.timestamps.Timestamp
And here is the working graph:

How do I index or plot datetimes after resampling so they display on a bar plot axis correctly?

I want to display my third plot x-axis data in the datetime like my other two plots (see linked figure). I have used similar approaches to each graph, but resampled the third dataset to plot precipitation in a bar graph for every hour in my time period. When I originally attempted to format the date for the third plot as I did in the previous two, the x-axis labels either disappeared or the data doesn't plot correctly. In the link below, the data is displayed the way I intended.
Three subplots of rainfall
My timeseries data appears like this, where I'm only concerned about 'Reading' and 'Value':
Reading,Receive,Value,Unit,Quality
2018-04-07 13:09:28,2018-04-07 13:09:35,0.00,in,A
2018-04-07 06:01:25,2018-04-07 06:01:35,0.04,in,A
2018-04-07 04:38:15,2018-04-07 04:38:35,0.04,in,A
Here is how I achieved the correct scheme in the second plot:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as patches
import matplotlib.dates as mdates
import datetime as dt
#read data from csv
data2 = pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Accumulation_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data2.set_index('Reading',inplace=True)
#plot data
ax2 = plt.subplot(3, 1, 2)
data2.plot(ax=ax2)
#set ticks every 12 hours
ax2.xaxis.set_major_locator(mdates.HourLocator(byhour=range(0,24,12)))
plt.xticks(rotation=0, ha='center')
#format date
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %d\n%H:%M:%S'))
ax2.legend().set_visible(False)
ax2.set_title('Accumulated Rainfall\nApril 5-7, 2018')
ax2.set_xlabel('')
ax2.set_ylabel('Inches Since Oct 1 2017')
ax2.set_ylim(17.5, 22)
arrow_date2 = mdates.datestr2num('04/07/2018 04:30:00')
start_date2 = mdates.datestr2num('04/07/2018 03:00:00')
end_date2 = mdates.datestr2num('04/07/2018 06:00:00')
text_date2 = mdates.datestr2num('04/07/2018 03:00:00')
ax2.axvspan(start_date2, end_date2, 0.86, 0.97, color='green', alpha=0.35)
ax2.annotate("Approximate time of\nSlope Failure", xy=(arrow_date2, 21.5), xycoords='data', xytext=(text_date2, 19), textcoords='data', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"))
My code so far for the third subplot:
#read data from csv
data =pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Increment_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data.set_index('Reading',inplace=True)
resamp = data.resample('1H').sum().reset_index()
#plot data
ax3 = plt.subplot(3, 1, 3)
resamp.plot(kind='bar',ax=ax3, x='Reading', y='Value', width=0.9)
#set ticks every other hour
plt.xticks(ha='center')
for label in ax3.xaxis.get_ticklabels()[::2]:
label.set_visible(False)
ax3.legend().set_visible(False)
ax3.set_title('Rainfall in Hours\nApril 6-7, 2018')
ax3.set_xlabel('')
ax3.set_ylabel('Precipitation Increment (in)')
plt.show()
How do I fix my code to make the axis labels plot in the way I want them to plot?
My code was wrong, obviously. When I resampled the data, I reset the index. This created a new index column that was messing with my desired x values ('Reading'). Additionally, I shouldn't have been plotting 'x' in resamp.plot. This solution helped: Plotting with Pandas. Here is the corrected code:
#read data from csv
data = pd.read_csv('Arroyo_Corte_Madera_del_Presidio_38021_Precipitation_Increment_0.txt', usecols=['Reading','Value'], parse_dates=['Reading'])
#set date as index
data.set_index('Reading',inplace=True)
resamp = data.resample('1H').sum() # changed here
#plot data
ax3 = plt.subplot(3, 1, 3)
resamp.plot(ax=ax3, y='Value', kind='bar', width=0.9) # changed here
ax3.set_xticklabels([dt.strftime('%b %d\n%H:%M:%S') for dt in resamp.index])
plt.xticks(rotation=0, ha='center')
for i, tick in enumerate(ax3.xaxis.get_major_ticks()):
if (i % (4) != 0): # 4 hours
tick.set_visible(False)
ax3.legend().set_visible(False)
ax3.set_title('Rainfall in Hours\nApril 6-7, 2018')
ax3.set_xlabel('')
ax3.set_ylabel('Precipitation Increment (in)')
ax3.set_ylim(0.00, 0.40)
plt.show()

Plot rectangles over datetime axis in matplotlib?

I am trying to manually create a candlestick chart with matplotlib using errorbar for the daily High and Low prices and Rectangle() for the Adjusted Close and Open prices. This question seemed to have all the prerequisites for accomplishing this.
I attempted to use the above very faithfully, but the issue of plotting something over an x-axis of datetime64[ns]'s gave me no end of errors, so I've additionally tried to incorporate the advice here on plotting over datetime.
This is my code so far, with apologies for the messiness:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
def makeCandles(xdata,high,low,adj_close,adj_open,fc='r',ec='None',alpha=0.5):
## Converting datetimes to numerical format matplotlib can understand.
dates = mdates.date2num(xdata)
## Creating default objects
fig,ax = plt.subplots(1)
## Creating errorbar peaks based on high and low prices
avg = (high + low) / 2
err = [high - avg,low - avg]
ax.errorbar(dates,err,fmt='None',ecolor='k')
## Create list for all the error patches
errorboxes = []
## Loop over data points; create "body" of candlestick
## based on adjusted open and close prices
errors=np.vstack((adj_close,adj_open))
errors=errors.T
for xc,yc,ye in zip(dates,avg,errors):
rect = Rectangle((xc,yc-ye[0]),1,ye.sum())
errorboxes.append(rect)
## Create patch collection with specified colour/alpha
pc = PatchCollection(errorboxes,facecolor=fc,alpha=alpha,edgecolor=ec)
## Add collection to axes
ax.add_collection(pc)
plt.show()
With my data looking like
This is what I try to run, first getting a price table from quandl,
import quandl as qd
api = '1uRGReHyAEgwYbzkPyG3'
qd.ApiConfig.api_key = api
data = qd.get_table('WIKI/PRICES', qopts = { 'columns': ['ticker', 'date', 'high','low','adj_open','adj_close'] }, \
ticker = ['AMZN', 'XOM'], date = { 'gte': '2014-01-01', 'lte': '2016-12-31' })
data.reset_index(inplace=True,drop=True)
makeCandles(data['date'],data['high'],data['low'],data['adj_open'],data['adj_close'])
The code runs with no errors, but outputs an empty graph. So what I am asking for is advice on how to plot these rectangles over the datetime dates. For the width of the rectangles, I simply put a uniform "1" bec. I am not aware of a simple way to specify the datetime width of a rectangle.
Edit
This is the plot I am currently getting, having transformed my xdata into matplotlib mdates:
Before I transformed xdata via mdates, with just xdata as my x-axis everywhere, this was one of the errors I kept getting:
To get the plot you want, there's a couple of things that need to be considered. First you're retrieving to stocks AMZN and XOM, displaying both will make the chart you want look funny, because the data are quite far apart. Second, candlestick charts in which you plot each day for several years will get very crowded. Finally, you need to format your ordinal dates back on the x-axis.
As mentioned in the comments, you can use the pre-built matplotlib candlestick2_ohlc function (although deprecated) accessible through mpl_finance, install as shown in this answer. I opted for using solely the matplotlib barchart with built-in errorbars.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import quandl as qd
from matplotlib.dates import DateFormatter, WeekdayLocator, \
DayLocator, MONDAY
# get data
api = '1uRGReHyAEgwYbzkPyG3'
qd.ApiConfig.api_key = api
data = qd.get_table('WIKI/PRICES', qopts={'columns': ['ticker', 'date', 'high', 'low', 'open', 'close']},
ticker=['AMZN', 'XOM'], date={'gte': '2014-01-01', 'lte': '2014-03-10'})
data.reset_index(inplace=True, drop=True)
fig, ax = plt.subplots(figsize = (10, 5))
data['date'] = mdates.date2num(data['date'].dt.to_pydatetime()) #convert dates to ordinal
tickers = list(set(data['ticker'])) # unique list of stock names
for stock_ind in tickers:
df = data[data['ticker'] == 'AMZN'] # select one, can do more in a for loop, but it will look funny
inc = df.close > df.open
dec = df.open > df.close
ax.bar(df['date'][inc],
df['open'][inc]-df['close'][inc],
color='palegreen',
bottom=df['close'][inc],
# this yerr is confusing when independent error bars are drawn => (https://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.errorbar)
yerr = [df['open'][inc]-df['high'][inc], -df['open'][inc]+df['low'][inc]],
error_kw=dict(ecolor='gray', lw=1))
ax.bar(df['date'][dec],
df['close'][dec]-df['open'][dec],
color='salmon', bottom=df['open'][dec],
yerr = [df['close'][dec]-df['high'][dec], -df['close'][dec]+df['low'][dec]],
error_kw=dict(ecolor='gray', lw=1))
ax.set_title(stock_ind)
#some tweaking, setting the dates
mondays = WeekdayLocator(MONDAY) # major ticks on the mondays
alldays = DayLocator() # minor ticks on the days
weekFormatter = DateFormatter('%b %d') # e.g., Jan 12
dayFormatter = DateFormatter('%d') # e.g., 12
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
ax.set_ylabel('monies ($)')
plt.show()

Categories

Resources