import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
# Bearish Pattern
if (open<=close and
one_open>one_close ):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
in the first example seems like is working grabbing 2 candle stick but when i grab 4 as next code example it shows me an error .........
import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
two_open = df.Close.iloc[-3]
two_close = df.Close.iloc[-3]
three_open = df.Close.iloc[-3]
three_close = df.Close.iloc[-3]
# Bearish Pattern
if (open<=close and
one_open>one_close and
two_open<two_close and
three_open>=three_close):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close and
two_open>two_close and
three_open<=three_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
I believe the problem is in this line.......
df = dataF[i-1:i+1]
Try this and feed back.
from utils import *
import time
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
from pandas_datareader import data as wb
tickers = ['SBUX']
start = '2022-09-01'
end = '2022-12-13'
price_data = []
for ticker in tickers:
data = yf.download(ticker, start, end)
data = data.reset_index()
prices = data.loc[:,['Date','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Date', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
# Technical Indicators
data = df
num_training_days = int(data.shape[0]*.7)
print('Number of training days: {}. Number of test days: {}.'.format(num_training_days, data.shape[0]-num_training_days))
data['ma7'] = data['Adj Close'].rolling(window=7).mean()
data['ma21'] = data['Adj Close'].rolling(window=21).mean()
# Create exponential weighted moving average
data['26ema'] = data['Adj Close'].ewm(span=26).mean()
data['12ema'] = data['Adj Close'].ewm(span=12).mean()
data['MACD'] = (data['12ema']-data['26ema'])
# Create Bollinger Bands
data['20sd'] = data['Adj Close'].rolling(window=20).std()
data['upper_band'] = data['ma21'] + (data['20sd']*2)
data['lower_band'] = data['ma21'] - (data['20sd']*2)
# Create Exponential moving average
data['ema'] = data['Adj Close'].ewm(com=0.5).mean()
# Create Momentum
data['momentum'] = data['Adj Close']-1
dataset_TI_df = data
dataset = data
#def plot_technical_indicators(dataset, last_days):
last_days = 250
plt.figure(figsize=(16, 10), dpi=100)
shape_0 = dataset.shape[0]
xmacd_ = shape_0-last_days
dataset = dataset.iloc[-last_days:, :]
x_ = range(3, dataset.shape[0])
x_ =list(dataset.index)
# Plot first subplot
plt.subplot(2, 1, 1)
plt.plot(dataset['ma7'],label='MA 7', color='g',linestyle='--')
plt.plot(dataset['Adj Close'],label='Closing Price', color='b')
plt.plot(dataset['ma21'],label='MA 21', color='r',linestyle='--')
plt.plot(dataset['upper_band'],label='Upper Band', color='c')
plt.plot(dataset['lower_band'],label='Lower Band', color='c')
plt.fill_between(x_, dataset['lower_band'], dataset['upper_band'], alpha=0.35)
plt.title('Technical indicators for Starbucks - last {} days.'.format(last_days))
plt.legend()
# Plot second subplot
plt.subplot(2, 1, 2)
plt.title('MACD')
plt.plot(dataset['MACD'],label='MACD', linestyle='-.')
plt.hlines(15, xmacd_, shape_0, colors='g', linestyles='--')
plt.hlines(-15, xmacd_, shape_0, colors='g', linestyles='--')
# plt.plot(dataset['log_momentum'],label='Momentum', color='b',linestyle='-')
plt.legend()
plt.show()
# Trade Signals
signalBuy = []
signalSell = []
position = False
for i in range(len(data)):
if data['ma7'][i] > data['ma21'][i]:
if position == False :
signalBuy.append(data['Adj Close'][i])
signalSell.append(np.nan)
position = True
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
elif data['ma7'][i] < data['ma21'][i]:
if position == True:
signalBuy.append(np.nan)
signalSell.append(data['Adj Close'][i])
position = False
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
data['Buy_Signal_price'] = signalBuy
data['Sell_Signal_price'] = signalSell
data
# Plotting Buy and Sell Points
fig, ax = plt.subplots(figsize=(14,8))
ax.plot(data['Adj Close'] , label = 'stock' ,linewidth=0.5, color='blue', alpha = 0.9)
ax.plot(data['ma7'], label = 'ma7', alpha = 0.85)
ax.plot(data['ma21'], label = 'ma21' , alpha = 0.85)
ax.scatter(data.index , data['Buy_Signal_price'] , label = 'Buy' , marker = '^', color = 'green',alpha =1 )
ax.scatter(data.index , data['Sell_Signal_price'] , label = 'Sell' , marker = 'v', color = 'red',alpha =1 )
ax.set_title(" Price History with buy and sell signals",fontsize=10, backgroundcolor='blue', color='white')
ax.set_xlabel(f'{startdate} - {end_date}' ,fontsize=18)
ax.set_ylabel('Close Price INR (₨)' , fontsize=18)
legend = ax.legend()
ax.grid()
plt.tight_layout()
plt.show()
moving_average_window = 30
data = df
# readjusting data Frame
data = data[["Adj Close"]]
# creating ** moving average
data["ma20"] = data["Adj Close"].rolling(window=moving_average_window).mean()
#calculating daily returns
data["daily returns"] = np.log(data["Adj Close"] / data["Adj Close"].shift(1))
data["position"] = [0] * len(data)
data.reset_index(inplace=True)
data = data.drop(["index"], axis=1)
pos_exit = False
pos = "N"
std = round(data["daily returns"].std(),4)
mean = round(data["daily returns"].mean(),4)
print("Std on daily returns :", std)
print("Mean on daily returns :", mean,"\n")
print(data.head(7))
# Event Driven Testing
for i in range(1, len(data)):
# Signal to go short and reset position
if pos_exit:
pos_exit = False
pos = "N"
continue
# going long, if return goes beyond lower bound
# (1 standard deviation). The asumption here is
# that the stock will revert back to its mean value
if data["Adj Close"][i] < ((1 - std) * data["ma20"][i]):
data.at[i, "position"] = 1
pos = "L"
# scenario if return in between lower and upper bounds
if pos == "L":
data.at[i, "position"] = 1
# updating strategy returns
data["strategy returns"] = data["daily returns"] * data["position"]
# exiting if the strategy return drops by 3%
if data["strategy returns"][i] < -0.03:
data.at[i, "position"] = 0
pos_exit = True
data.tail(10)
# taking positions after one day of signals being generated
data["position"].shift(1)
print("Buy and hold returns =",round(list(data["daily returns"].cumsum())[-1],4)*100,"%")
print("Strategy returns =", round(list(data["strategy returns"].cumsum())[-1],4)*100,"%")
Related
I'm retrieving live data to use it for further processing in a dataframe.
The first part (get_binance_bars function)gets the historical data where a linear regression line is fitted.
Now, I would like to have the linear regression line to be updated whenever the websocket receives data in. The changing live data is in df['live_price'].
How would you do this?
import websocket, json
import requests
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime, date
from sklearn.linear_model import LinearRegression
symbol = "ETHUSDT"
tf = "1m"
now = datetime.now()
today = date.today()
d = int(today.strftime("%d"))
m = int(today.strftime("%m"))
y = int(today.strftime("%Y"))
hr = int(now.strftime("%H"))
mn = int(now.strftime("%M"))
def get_binance_bars(ticker, interval, startTime, endTime):
url = "https://api.binance.com/api/v3/klines"
startTime = str(int(startTime.timestamp() * 1000))
endTime = str(int(endTime.timestamp() * 1000))
limit = '1000'
req_params = {"symbol" : ticker, 'interval' : interval, 'startTime' : startTime, 'endTime' : endTime, 'limit' : limit}
df = pd.DataFrame(json.loads(requests.get(url, params = req_params).text))
if (len(df.index) == 0):
return None
df = df.iloc[:, 0:4]
df.columns = ['time', 'high', 'low', 'close']
df.close = df.close.astype("float")
df.low = df.low.astype("float")
df.high = df.high.astype("float")
global Y_pred
X = df.time.iloc[-20:].values.reshape(-1, 1)
Y = df.close.iloc[-20:].values.reshape(-1, 1)
linear_regressor = LinearRegression()
linear_regressor.fit(X, Y)
Y_pred = linear_regressor.predict(X)
df['Y_pred'] = np.nan
df.iloc[-20:, df.columns.get_loc('Y_pred')] = Y_pred
df.time = [dt.datetime.fromtimestamp(x / 1000.0) for x in df.time]
df.drop(df.tail(1).index,inplace=True) #cut last row to prevent double bar with live data
return df
SOCKET = "wss://stream.binance.com:9443/ws/"+symbol.lower()+"#kline_"+tf
df = get_binance_bars(symbol, tf, dt.datetime(y, m, d, hr-hr, mn), dt.datetime(y, m, d, hr, mn)) #define how many bars, hr-1 = 60 bars
def on_open(ws):
print('opened connection')
def on_close(ws):
print('closed connection')
def on_message(ws, message):
global df
global time_plot
global close
global low
global high
json_message = json.loads(message)
high = float(json_message['k']['h'])
low = float(json_message['k']['l'])
close = float(json_message['k']['c'])
time_plot = dt.datetime.fromtimestamp(json_message['k']['t']/1000).strftime('%H:%M')
df['live_price'] = close
df.iloc[-20:, df.columns.get_loc('Y_pred')] = Y_pred #<--- DOESN'T WORK
print(df)
ws = websocket.WebSocketApp(SOCKET, on_open=on_open, on_close=on_close, on_message=on_message)
ws.run_forever()
I would prefer to use Plotly for this sort of work. Please have a look at the Dash component with Interval for updating the graphs and plots. It would be useful in the longer run and making the dashboards.
This is my code:
import pandas as pd
import numpy as np
from bokeh.models import *
from bokeh.plotting import *
from bokeh.io import *
from bokeh.tile_providers import *
from bokeh.palettes import *
from bokeh.transform import *
from bokeh.layouts import *
radius_scale = 100
df = pd.DataFrame({'date': ['2009-01-01', '2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05', '2009-01-01', '2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05','2009-01-01', '2009-01-02', '2009-01-03', '2009-01-04', '2009-01-05'],
'state': ['Melaka', 'Melaka', 'Melaka', 'Melaka', 'Melaka', 'Perak', 'Perak', 'Perak', 'Perak', 'Perak', 'Kuala Lumpur', 'Kuala Lumpur', 'Kuala Lumpur', 'Kuala Lumpur', 'Kuala Lumpur'],
'tourists': [100, 121, 235, 197, 390, 57, 49, 81, 73, 183, 351, 490, 618, 438, 557]})
df['longitude'] = df['state'].map({'Melaka': 102.2464615, 'Perak': 101.0314453, 'Kuala Lumpur': 101.6869,})
df['latitude'] = df['state'].map({'Melaka': 2.206414407, 'Perak': 4.01185976, 'Kuala Lumpur': 3.1390,})
df['date'] = df['date'].astype('datetime64[ns]')
df['tourists_plot'] = df['tourists'] * radius_scale
# Mercator units conversion
def wgs84_to_web_mercator(df, lon, lat):
"""Converts decimal longitude/latitude to Web Mercator format"""
k = 6378137
df["x"] = df[lon] * (k * np.pi/180.0)
df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
return df
df = wgs84_to_web_mercator(df, 'longitude', 'latitude')
source = ColumnDataSource(df)
# Map zoom scale
scale = 2500
x = df['x']
y = df['y']
# Centers map
x_min = int(x.mean() - (scale * 1))
x_max = int(x.mean() + (scale * 1))
y_min = int(y.mean() - (scale * 350))
y_max = int(y.mean() + (scale * 350))
# Prepare Bokeh
plot = figure(
title = 'Malaysia Tourism',
match_aspect = True,
tools = 'wheel_zoom, pan, reset, save',
x_range = (x_min, x_max), y_range = (y_min, y_max),
x_axis_type = 'mercator', y_axis_type = 'mercator',
width = 900
)
plot.grid.visible = True
# Get map
map = plot.add_tile(get_provider(OSM))
map.level = 'underlay'
plot.xaxis.visible = False
plot.yaxis.visible = False
plot.title.text_font_size = "20px"
def bubble_map(plot, df_source, radius_col_plot, radius_col, state, color='orange', leg_label='Bubble Map'):
source = df_source
c = plot.circle(x = 'x', y = 'y', color = color, source = source, size = 1, fill_alpha = 0.4, radius = radius_col_plot,\
legend_label = leg_label, hover_color = 'red')
tip_label = '#' + radius_col
state_label = '#' + state
circle_hover = HoverTool(tooltips = [("Percentage " + leg_label, tip_label + "%"), ('State', state_label)], mode = 'mouse',\
point_policy = 'follow_mouse', renderers = [c])
circle_hover.renderers.append(c)
plot.tools.append(circle_hover)
plot.legend.location = "top_right"
plot.legend.click_policy = "hide"
bubble_map(plot = plot,
df_source = source, radius_col_plot = 'tourists_plot', radius_col = 'tourists', leg_label = 'Tourists',
state = 'state', color = 'blue')
date_slider = DateSlider(start = min(df.date), end = max(df.date), value = min(df.date), step = 1, title = "Date")
def update_plot(attr, old, new):
datesel = new
new_data = df[df['date'] == datesel]
source.data.update(ColumnDataSource(new_data).data)
date_slider.on_change('value', update_plot)
# show(column(date_slider, plot))
curdoc().add_root(column(date_slider, plot))
In short, what I'm trying to plot is the growth of df['tourists'] of each state on map by each day. I am trying to enable data filtering via DateSlider widget date_slider which filters rows based on selected date
However the slider isn't working for me. I am not getting any errors either, in which I'm in lost of what to debug
This is what I got before selection, where all 5 days data are cluttered together:
Not like the slider helps either, all points disappeared upon sliding:
Please advise
DateSlider returns a timestamp in milliseconds, which you are using to filter the new dataframe. As the new date is not in the form of YYYY-MM-DD, your dataframe comes out empty.
Convert the timestamp received from DateSlider and it should work:
def update_plot(attr, old, new):
datesel = datetime.fromtimestamp(new / 1000).strftime('%Y-%m-%d')
new_data = df[df['date'] == datesel]
source.data.update(ColumnDataSource(new_data).data)
Since the timestamp is in milliseconds and datetime expects seconds, it has to be divided by 1000 to convert to seconds.
Another suggestion is also to change step from 1 to 86400000 to be able to slide select easier between dates (milliseconds in a day).
date_slider = DateSlider(start = min(df.date), end = max(df.date), value = min(df.date), step = 86400000, title = "Date")
I am trying to plot a histogram but can't seem to get it working.
My current code is using a line plot.
The code is below:
ticker = 'BGSF'
style.use('ggplot')
start_date = '01-01-2010'
end_date = '03-07-2021'
prices = pdr.DataReader(ticker, data_source='yahoo', start=start, end=end)['Close']
returns = prices.pct_change()
last_price = prices[-1]
number_of_simulations = 10000
num_days = 90
simulation_df = pd.DataFrame()
for x in range(number_of_simulations):
counter = 0
daily_vol = returns.std()
price_series = []
price = last_price * (1 + np.random.normal(0, daily_vol))
price_series.append(price)
for y in range(num_days):
if counter == 251:
break
price = price_series[counter] * (1 + np.random.normal(0, daily_vol))
price_series.append(price)
counter += 1
simulation_df[x] = price_series
fig = plt.figure()
fig.suptitle('Simulator')
plt.plot(simulation_df)
plt.axhline(y = last_price, color = 'r', linestyle = '-')
plt.xlabel('Day')
plt.ylabel('Price')
plt.show()
How can I change my code so that I get a histogram/distribution? Or something that will give a discernible visual representation of the data.
The data output looks like:
0 1 2 ... 9997 9998 9999
0 13.628622 13.239073 12.377603 ... 11.604061 13.289695 12.351764
1 13.286069 13.229105 11.802037 ... 10.922634 13.369048 11.687561
2 13.278381 12.754887 11.293223 ... 10.722178 14.019657 11.468026
3 13.518970 13.051310 11.697287 ... 10.973414 14.125174 11.326635
4 12.696852 13.037619 11.126289 ... 10.840734 14.397820 11.226199
You can do the following
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#generate some data
col0 = np.random.randn(1000)
col1 = np.random.randn(1000)+10
df_simulated = pd.DataFrame({"0":col0,"1":col1})
last_price = 300
fig,ax = plt.subplots(1,1)
fig.suptitle("Simulator")
plt.hist(df_simulated,axes=ax) #plot histogram on "ax"
plt.axhline(y = last_price, color = 'r', linestyle = '-',axes=ax)
ax.set_xlabel('Day')
ax.set_ylabel('Price')
My timing shows that k-means consistently loses out on timing, compared to a mixture model, initialized using k-means.
What's the explanation for this? Is the GMM using a different k-means algorithm? Am I misunderstanding how it works? Does it use a differently sized dataset (smaller than I'm drawing from?).
import sklearn.cluster
import sklearn.mixture
import numpy as np
import time
import matplotlib.pyplot as plt
k = 3
N = 100
def clust():
m = sklearn.cluster.KMeans(n_clusters = k)
m.fit(X.reshape(-1, 1))
return m.cluster_centers_
def fit():
m = sklearn.mixture.GaussianMixture(n_components = k, init_params = "kmeans")
m.fit(X.reshape(-1, 1))
return m.means_
duration_clust = []
duration_fit = []
ctrs_clust = []
ctrs_fit = []
for i in range(N):
_1 = np.random.normal(0.25, 0.15, 50)
_2 = np.random.normal(0.50, 0.15, 50)
_3 = np.random.normal(0.75, 0.15, 50)
X = np.concatenate((_1, _2, _3)).reshape(-1, 1)
ts = time.time()
c = clust()
te = time.time()
time_clust = (te - ts) * 1e3
ts = time.time()
f = fit()
te = time.time()
time_fit = (te - ts) * 1e3
duration_clust.append(time_clust)
duration_fit.append(time_fit)
ctrs_clust.append(c)
ctrs_fit.append(f)
bins0 = np.arange(0, 20, 1)
bins1 = np.linspace(0,1,30)
fig, ax = plt.subplots(nrows = 2)
ax[0].hist(duration_clust, label = "Kmeans", bins = bins0, alpha = 0.5)
ax[0].hist(duration_fit, label = "GMM with Kmeans", bins = bins0, alpha = 0.5)
ax[0].set_xlabel("duration (ms)")
ax[0].legend(loc = "upper right")
ax[1].hist(np.ravel(ctrs_clust), label = "Kmeans centers", bins = bins1, alpha = 0.5)
ax[1].hist(np.ravel(ctrs_fit), label = "GMM centers", bins = bins1, alpha = 0.5)
ax[1].set_xlabel("Center location")
ax[1].axvline([0.25], label = "Truth", color = "black")
ax[1].axvline([0.50], color = "black")
ax[1].axvline([0.75], color = "black")
ax[1].legend(loc = "upper right")
plt.tight_layout()
plt.show()
Can anybody help how to optimize the plot function in python? I use Matplotlib to plot financial data.Here small function for plotting OHLC data. The time increase significantly if I add indicators or other data.
import numpy as np
import datetime
from matplotlib.collections import LineCollection
from pylab import *
import urllib2
def test_plot(OHLCV):
bar_width = 1.3
date_offset = 0.5
fig = figure(figsize=(50, 20), facecolor='w')
ax = fig.add_subplot(1, 1, 1)
labels = ax.get_xmajorticklabels()
setp(labels, rotation=0)
month = MonthLocator()
day = DayLocator()
timeFmt = DateFormatter('%Y-%m-%d')
colormap = OHLCV[:,1] < OHLCV[:,4]
color = np.zeros(colormap.__len__(), dtype = np.dtype('|S5'))
color[:] = 'red'
color[np.where(colormap)] = 'green'
dates = date2num( OHLCV[:,0])
lines_hl = LineCollection( zip(zip(dates, OHLCV[:,2]), zip(dates, OHLCV[:,3])))
lines_hl.set_color(color)
lines_hl.set_linewidth(bar_width)
lines_op = LineCollection( zip(zip((np.array(dates) - date_offset).tolist(), OHLCV[:,1]), zip((np.array(dates)).tolist(), parsed_table[:,1])))
lines_op.set_color(color)
lines_op.set_linewidth(bar_width)
lines_cl = LineCollection( zip(zip((np.array(dates) + date_offset).tolist(), OHLCV[:,4]), zip((np.array(dates)).tolist(), parsed_table[:,4])))
lines_cl.set_color(color)
lines_cl.set_linewidth(bar_width)
ax.add_collection(lines_hl, autolim=True)
ax.add_collection(lines_cl, autolim=True)
ax.add_collection(lines_op, autolim=True)
ax.xaxis.set_major_locator(month)
ax.xaxis.set_major_formatter(timeFmt)
ax.xaxis.set_minor_locator(day)
ax.autoscale_view()
ax.xaxis.grid(True, 'major')
ax.grid(True)
ax.set_title('EOD test plot')
ax.set_xlabel('Date')
ax.set_ylabel('Price , $')
fig.savefig('test.png', dpi = 50, bbox_inches='tight')
close()
if __name__=='__main__':
data_table = urllib2.urlopen(r"http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2012&d=00&e=15&f=2013&g=d&ignore=.csv").readlines()[1:][::-1]
parsed_table = []
#Format: Date, Open, High, Low, Close, Volume
dtype = (lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(),float, float, float, float, int)
for row in data_table:
field = row.strip().split(',')[:-1]
data_tmp = [i(j) for i,j in zip(dtype, field)]
parsed_table.append(data_tmp)
parsed_table = np.array(parsed_table)
import time
bf = time.time()
count = 100
for i in xrange(count):
test_plot(parsed_table)
print('Plot time: %s' %(time.time() - bf) / count)
The result is something like this. Average time execution on each plot is aproximately 2.6s. Charting in R is much faster, but I didn't measure the performance and I don't want use Rpy, so I bielive that my code is inefficient.
This solution reuses a Figure instance and saves plots asynchronously. You could change this to have as many figures as there are processors, do that many plots asynchronously, and it should speed things up even more. As it is, this takes ~1s per plot, down from 2.6 on my machine.
import numpy as np
import datetime
import urllib2
import time
import multiprocessing as mp
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from pylab import *
from matplotlib.collections import LineCollection
class AsyncPlotter():
def __init__(self, processes=mp.cpu_count()):
self.manager = mp.Manager()
self.nc = self.manager.Value('i', 0)
self.pids = []
self.processes = processes
def async_plotter(self, nc, fig, filename, processes):
while nc.value >= processes:
time.sleep(0.1)
nc.value += 1
print "Plotting " + filename
fig.savefig(filename)
plt.close(fig)
nc.value -= 1
def save(self, fig, filename):
p = mp.Process(target=self.async_plotter,
args=(self.nc, fig, filename, self.processes))
p.start()
self.pids.append(p)
def join(self):
for p in self.pids:
p.join()
class FinanceChart():
def __init__(self, async_plotter):
self.async_plotter = async_plotter
self.bar_width = 1.3
self.date_offset = 0.5
self.fig = plt.figure(figsize=(50, 20), facecolor='w')
self.ax = self.fig.add_subplot(1, 1, 1)
self.labels = self.ax.get_xmajorticklabels()
setp(self.labels, rotation=0)
line_hl = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
line_op = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
line_cl = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
self.lines_hl = self.ax.add_collection(line_hl, autolim=True)
self.lines_op = self.ax.add_collection(line_cl, autolim=True)
self.lines_cl = self.ax.add_collection(line_op, autolim=True)
self.ax.set_title('EOD test plot')
self.ax.set_xlabel('Date')
self.ax.set_ylabel('Price , $')
month = MonthLocator()
day = DayLocator()
timeFmt = DateFormatter('%Y-%m-%d')
self.ax.xaxis.set_major_locator(month)
self.ax.xaxis.set_major_formatter(timeFmt)
self.ax.xaxis.set_minor_locator(day)
def test_plot(self, OHLCV, i):
colormap = OHLCV[:,1] < OHLCV[:,4]
color = np.zeros(colormap.__len__(), dtype = np.dtype('|S5'))
color[:] = 'red'
color[np.where(colormap)] = 'green'
dates = date2num( OHLCV[:,0])
date_array = np.array(dates)
xmin = min(dates)
xmax = max(dates)
ymin = min(OHLCV[:,1])
ymax = max(OHLCV[:,1])
self.lines_hl.set_segments( zip(zip(dates, OHLCV[:,2]), zip(dates, OHLCV[:,3])))
self.lines_hl.set_color(color)
self.lines_hl.set_linewidth(self.bar_width)
self.lines_op.set_segments( zip(zip((date_array - self.date_offset).tolist(), OHLCV[:,1]), zip(date_array.tolist(), OHLCV[:,1])))
self.lines_op.set_color(color)
self.lines_op.set_linewidth(self.bar_width)
self.lines_cl.set_segments( zip(zip((date_array + self.date_offset).tolist(), OHLCV[:,4]), zip(date_array.tolist(), OHLCV[:,4])))
self.lines_cl.set_color(color)
self.lines_cl.set_linewidth(self.bar_width)
self.ax.set_xlim(xmin,xmax)
self.ax.set_ylim(ymin,ymax)
self.ax.xaxis.grid(True, 'major')
self.ax.grid(True)
self.async_plotter.save(self.fig, '%04i.png'%i)
if __name__=='__main__':
print "Starting"
data_table = urllib2.urlopen(r"http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2012&d=00&e=15&f=2013&g=d&ignore=.csv").readlines()[1:][::-1]
parsed_table = []
#Format: Date, Open, High, Low, Close, Volume
dtype = (lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(),float, float, float, float, int)
for row in data_table:
field = row.strip().split(',')[:-1]
data_tmp = [i(j) for i,j in zip(dtype, field)]
parsed_table.append(data_tmp)
parsed_table = np.array(parsed_table)
import time
bf = time.time()
count = 10
a = AsyncPlotter()
_chart = FinanceChart(a)
print "Done with startup tasks"
for i in xrange(count):
_chart.test_plot(parsed_table, i)
a.join()
print('Plot time: %.2f' %(float(time.time() - bf) / float(count)))