How can I plot a histogram using pandas?

How can I plot a histogram using pandas? - python

I am trying to plot a histogram but can't seem to get it working.
My current code is using a line plot.
The code is below:
ticker = 'BGSF'
style.use('ggplot')
start_date = '01-01-2010'
end_date = '03-07-2021'
prices = pdr.DataReader(ticker, data_source='yahoo', start=start, end=end)['Close']
returns = prices.pct_change()
last_price = prices[-1]
number_of_simulations = 10000
num_days = 90
simulation_df = pd.DataFrame()
for x in range(number_of_simulations):
counter = 0
daily_vol = returns.std()
price_series = []
price = last_price * (1 + np.random.normal(0, daily_vol))
price_series.append(price)
for y in range(num_days):
if counter == 251:
break
price = price_series[counter] * (1 + np.random.normal(0, daily_vol))
price_series.append(price)
counter += 1
simulation_df[x] = price_series
fig = plt.figure()
fig.suptitle('Simulator')
plt.plot(simulation_df)
plt.axhline(y = last_price, color = 'r', linestyle = '-')
plt.xlabel('Day')
plt.ylabel('Price')
plt.show()
How can I change my code so that I get a histogram/distribution? Or something that will give a discernible visual representation of the data.
The data output looks like:
0 1 2 ... 9997 9998 9999
0 13.628622 13.239073 12.377603 ... 11.604061 13.289695 12.351764
1 13.286069 13.229105 11.802037 ... 10.922634 13.369048 11.687561
2 13.278381 12.754887 11.293223 ... 10.722178 14.019657 11.468026
3 13.518970 13.051310 11.697287 ... 10.973414 14.125174 11.326635
4 12.696852 13.037619 11.126289 ... 10.840734 14.397820 11.226199

You can do the following
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#generate some data
col0 = np.random.randn(1000)
col1 = np.random.randn(1000)+10
df_simulated = pd.DataFrame({"0":col0,"1":col1})
last_price = 300
fig,ax = plt.subplots(1,1)
fig.suptitle("Simulator")
plt.hist(df_simulated,axes=ax) #plot histogram on "ax"
plt.axhline(y = last_price, color = 'r', linestyle = '-',axes=ax)
ax.set_xlabel('Day')
ax.set_ylabel('Price')

Related

Python bot for trading

import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
# Bearish Pattern
if (open<=close and
one_open>one_close ):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
in the first example seems like is working grabbing 2 candle stick but when i grab 4 as next code example it shows me an error .........
import yfinance as yf
import pandas as pd
dataF = yf.download("EURUSD=X", start="2022-12-22", end="2022-12-24", interval='60m')
print(dataF.iloc[:])
def signal_generator(df):
open = df.Open.iloc[-1]
close = df.Close.iloc[1]
one_open = df.Open.iloc[-2]
one_close = df.Close.iloc[-2]
two_open = df.Close.iloc[-3]
two_close = df.Close.iloc[-3]
three_open = df.Close.iloc[-3]
three_close = df.Close.iloc[-3]
# Bearish Pattern
if (open<=close and
one_open>one_close and
two_open<two_close and
three_open>=three_close):
return 1
# Bullish Pattern
elif (open>=close and
one_open<one_close and
two_open>two_close and
three_open<=three_close
):
return 2
# No clear pattern
else:
return 0
signal = []
signal.append(0)
for i in range(1,len(dataF)):
df = dataF[i-1:i+1]
signal.append(signal_generator(df))
#signal_generator(data)
dataF["signal"] = signal
print(dataF.signal.value_counts())
I believe the problem is in this line.......
df = dataF[i-1:i+1]

Try this and feed back.
from utils import *
import time
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")
from pandas_datareader import data as wb
tickers = ['SBUX']
start = '2022-09-01'
end = '2022-12-13'
price_data = []
for ticker in tickers:
data = yf.download(ticker, start, end)
data = data.reset_index()
prices = data.loc[:,['Date','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Date', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
# Technical Indicators
data = df
num_training_days = int(data.shape[0]*.7)
print('Number of training days: {}. Number of test days: {}.'.format(num_training_days, data.shape[0]-num_training_days))
data['ma7'] = data['Adj Close'].rolling(window=7).mean()
data['ma21'] = data['Adj Close'].rolling(window=21).mean()
# Create exponential weighted moving average
data['26ema'] = data['Adj Close'].ewm(span=26).mean()
data['12ema'] = data['Adj Close'].ewm(span=12).mean()
data['MACD'] = (data['12ema']-data['26ema'])
# Create Bollinger Bands
data['20sd'] = data['Adj Close'].rolling(window=20).std()
data['upper_band'] = data['ma21'] + (data['20sd']*2)
data['lower_band'] = data['ma21'] - (data['20sd']*2)
# Create Exponential moving average
data['ema'] = data['Adj Close'].ewm(com=0.5).mean()
# Create Momentum
data['momentum'] = data['Adj Close']-1
dataset_TI_df = data
dataset = data
#def plot_technical_indicators(dataset, last_days):
last_days = 250
plt.figure(figsize=(16, 10), dpi=100)
shape_0 = dataset.shape[0]
xmacd_ = shape_0-last_days
dataset = dataset.iloc[-last_days:, :]
x_ = range(3, dataset.shape[0])
x_ =list(dataset.index)
# Plot first subplot
plt.subplot(2, 1, 1)
plt.plot(dataset['ma7'],label='MA 7', color='g',linestyle='--')
plt.plot(dataset['Adj Close'],label='Closing Price', color='b')
plt.plot(dataset['ma21'],label='MA 21', color='r',linestyle='--')
plt.plot(dataset['upper_band'],label='Upper Band', color='c')
plt.plot(dataset['lower_band'],label='Lower Band', color='c')
plt.fill_between(x_, dataset['lower_band'], dataset['upper_band'], alpha=0.35)
plt.title('Technical indicators for Starbucks - last {} days.'.format(last_days))
plt.legend()
# Plot second subplot
plt.subplot(2, 1, 2)
plt.title('MACD')
plt.plot(dataset['MACD'],label='MACD', linestyle='-.')
plt.hlines(15, xmacd_, shape_0, colors='g', linestyles='--')
plt.hlines(-15, xmacd_, shape_0, colors='g', linestyles='--')
# plt.plot(dataset['log_momentum'],label='Momentum', color='b',linestyle='-')
plt.legend()
plt.show()
# Trade Signals
signalBuy = []
signalSell = []
position = False
for i in range(len(data)):
if data['ma7'][i] > data['ma21'][i]:
if position == False :
signalBuy.append(data['Adj Close'][i])
signalSell.append(np.nan)
position = True
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
elif data['ma7'][i] < data['ma21'][i]:
if position == True:
signalBuy.append(np.nan)
signalSell.append(data['Adj Close'][i])
position = False
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
else:
signalBuy.append(np.nan)
signalSell.append(np.nan)
data['Buy_Signal_price'] = signalBuy
data['Sell_Signal_price'] = signalSell
data
# Plotting Buy and Sell Points
fig, ax = plt.subplots(figsize=(14,8))
ax.plot(data['Adj Close'] , label = 'stock' ,linewidth=0.5, color='blue', alpha = 0.9)
ax.plot(data['ma7'], label = 'ma7', alpha = 0.85)
ax.plot(data['ma21'], label = 'ma21' , alpha = 0.85)
ax.scatter(data.index , data['Buy_Signal_price'] , label = 'Buy' , marker = '^', color = 'green',alpha =1 )
ax.scatter(data.index , data['Sell_Signal_price'] , label = 'Sell' , marker = 'v', color = 'red',alpha =1 )
ax.set_title(" Price History with buy and sell signals",fontsize=10, backgroundcolor='blue', color='white')
ax.set_xlabel(f'{startdate} - {end_date}' ,fontsize=18)
ax.set_ylabel('Close Price INR (₨)' , fontsize=18)
legend = ax.legend()
ax.grid()
plt.tight_layout()
plt.show()
moving_average_window = 30
data = df
# readjusting data Frame
data = data[["Adj Close"]]
# creating ** moving average
data["ma20"] = data["Adj Close"].rolling(window=moving_average_window).mean()
#calculating daily returns
data["daily returns"] = np.log(data["Adj Close"] / data["Adj Close"].shift(1))
data["position"] = [0] * len(data)
data.reset_index(inplace=True)
data = data.drop(["index"], axis=1)
pos_exit = False
pos = "N"
std = round(data["daily returns"].std(),4)
mean = round(data["daily returns"].mean(),4)
print("Std on daily returns :", std)
print("Mean on daily returns :", mean,"\n")
print(data.head(7))
# Event Driven Testing
for i in range(1, len(data)):
# Signal to go short and reset position
if pos_exit:
pos_exit = False
pos = "N"
continue
# going long, if return goes beyond lower bound
# (1 standard deviation). The asumption here is
# that the stock will revert back to its mean value
if data["Adj Close"][i] < ((1 - std) * data["ma20"][i]):
data.at[i, "position"] = 1
pos = "L"
# scenario if return in between lower and upper bounds
if pos == "L":
data.at[i, "position"] = 1
# updating strategy returns
data["strategy returns"] = data["daily returns"] * data["position"]
# exiting if the strategy return drops by 3%
if data["strategy returns"][i] < -0.03:
data.at[i, "position"] = 0
pos_exit = True
data.tail(10)
# taking positions after one day of signals being generated
data["position"].shift(1)
print("Buy and hold returns =",round(list(data["daily returns"].cumsum())[-1],4)*100,"%")
print("Strategy returns =", round(list(data["strategy returns"].cumsum())[-1],4)*100,"%")

Matplotlib live data animation runs out of memory

I'm using a Raspberry Pi to plot live data from serial, but eventually run out of memory. I'm not sure if/how I can close the figure, but still have a live data display.
Would it be possible to create and close a new figure with every animate?
My code at the moment:
import serial
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('TkAgg') #comment out for debugging
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import gc
# Create figure for plotting
fig = plt.figure()
xs = []
ysAC = []
ysDC = []
ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)
ser.flush()
# This function is called periodically from FuncAnimation
def animate(i, xs, ysAC, ysDC):
values = getValues()
wAC = values[1]
wDC = values[2]
# Add x and y to lists
xs.append(i)
ysAC.append(wAC)
ysDC.append(wDC)
# Limit x and y lists to 10 items
xs = ['T-9','T-8','T-7','T-6','T-5','T-4','T-3','T-2','T-1','Now']
ysDC = ysDC[-10:]
ysAC = ysAC[-10:]
# Draw x and y lists
axRT1.clear()
if len(ysDC) == 10:
lineAC, = axRT1.plot(xs, ysAC, 'b:', label='Mains', linewidth = 4)
lineDC, = axRT1.plot(xs, ysDC, 'g--', label='Solar', linewidth = 4)
gc.collect()
#fig.clf()
#plt.close()
def getValues():
if ser.in_waiting > 0:
line = ser.readline().decode('utf-8').rstrip()
return list(line.split(","))
# Set up plot to call animate() function periodically
ani = animation.FuncAnimation(fig, animate, fargs=(xs, ysAC, ysDC), interval=1000, blit=False)
plt.get_current_fig_manager().full_screen_toggle()
plt.ioff()
plt.show()
plt.draw()

The crude way of clearing the plots marked below fixed it for me:
import time
import serial
import datetime as dt
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('TkAgg') #comment out for debugging
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook
import matplotlib.animation as animation
from decimal import Decimal
import pandas as pd
import numpy as np
import os.path as path
import re
import gc
import os
count = 0
# Create figure for plotting
fig = plt.figure()
fig.patch.set_facecolor('whitesmoke')
hFont = {'fontname':'sans-serif', 'weight':'bold', 'size':'12'}
xs = ['T-9','T-8','T-7','T-6','T-5','T-4','T-3','T-2','T-1','Now']
ysTemp = []
ysAC = []
ysDC = []
axRT1 = fig.add_subplot(2, 2, 1)
axRT2 = axRT1.twinx() # instantiate a second axes that shares the same x-axis
#Draw x and y lists
axRT1.clear()
axRT2.clear()
axRT1.set_ylim([0, 4])
axRT2.set_ylim([10, 70])
axRT1.set_ylabel('Power Consumption kW', **hFont)
axRT2.set_ylabel('Temperature C', **hFont)
axRT1.set_xlabel('Seconds', **hFont)
axRT1.set_title('Power Consumption and Temperature - Real Time', **hFont)
lineTemp, = axRT2.plot([], [], 'r', label='Temp', linewidth = 4)
lineAC, = axRT1.plot([], [], 'b:', label='Mains', linewidth = 4)
lineDC, = axRT1.plot([], [], 'g--', label='Solar', linewidth = 4)
fig.legend([lineAC, lineDC,lineTemp], ['Mains', 'Solar', 'Temp'], fontsize=20)
ser = serial.Serial('/dev/ttyUSB0', 115200, timeout=1)
ser.flush()
# This function is called periodically from FuncAnimation
def animate(i, xs, ysTemp, ysAC, ysDC):
values = getValues()
if values != 0:
temp_c = Decimal(re.search(r'\d+',values[3]).group())
if temp_c < 0: temp_c = 0
wAC = round(Decimal(re.search(r'\d+', values[1]).group())/1000, 2)
if wAC < 0.35: wAC = 0
aDC = float(re.search(r'\d+', values[2]).group()) #remove characters
vDC = float(re.search(r'\d+', values[4][:5]).group()) #remove characters
wDC = aDC * vDC
wDC = round(abs(Decimal(wDC))/1000, 2)
# Add x and y to lists
ysTemp.append(temp_c)
ysAC.append(wAC)
ysDC.append(wDC)
# Limit x and y lists to 10 items
ysTemp = ysTemp[-10:]
ysDC = ysDC[-10:]
ysAC = ysAC[-10:]
if len(ysTemp) == 10:
axRT2.lines = [] #This crude way of clearing the plots worked
axRT1.lines =[] #This crude way of clearing the plots worked
lineTemp, = axRT2.plot(xs, ysTemp, 'r', label='Temp', linewidth = 4)
lineAC, = axRT1.plot(xs, ysAC, 'b:', label='Mains', linewidth = 4)
lineDC, = axRT1.plot(xs, ysDC, 'g--', label='Solar', linewidth = 4)
def getValues():
measureList = 0
if ser.in_waiting > 0:
line = ser.readline().decode('utf-8').rstrip()
print(line)
if line.count(',') == 4:
measureList = list(line.split(","))
return measureList
# Set up plot to call animate() function periodically
ani = animation.FuncAnimation(fig, animate, fargs=(xs, ysTemp, ysAC, ysDC), interval=1000, blit=False)
plt.get_current_fig_manager().full_screen_toggle()
plt.ioff()
plt.show()
plt.draw()

3d scatter plot animation with blitting

I want to make a 3d line plot animation with a point at the last point plotted, like a trajectory for a particule. In a case without blitting, I did it with a scatter plot and a line plot3d, updating both plots in the update function. I tried doing it in the same way, with blitting, but it throws the following error
TypeError: 'Path3DCollection' object is not iterable
My code is the following
import matplotlib.pyplot as m
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
def ranstep(start,step,stop):
numberlist = int((stop - start)/step + 1)
lista = [0]*numberlist
i = 0
while i <= numberlist:
if i == 0:
lista[i] = start
i = i + 1
if i != 0 and i < numberlist - 1:
lista[i] = lista[i - 1] + step
i = i + 1
if i != 0 and i == numberlist - 1:
lista[i] = stop
break
return(lista)
k1 = 10.0
k2 = 28.0
k3 = 8.0/3.0
czeroA = 1
czeroB = 0
czeroC = 0
step = 0.001
tfinal = 100
time = list(ranstep(0,step,tfinal))
cA = [0]*len(time)
cB = [0]*len(time)
cC = [0]*len(time)
i = 0
while i <= len(time) - 1:
if i == 0:
cA[i] = czeroA
cB[i] = czeroB
cC[i] = czeroC
i = i + 1
else:
cA[i] = cA[i - 1] + k1*(cB[i - 1] - cA[i - 1])*step
cB[i] = cB[i - 1] + (cA[i - 1]*(k2 - cC[i - 1]) - cB[i-1])*step
cC[i] = cC[i - 1] + (cA[i - 1]*cB[i-1] - k3*cC[i-1])*step
i = i + 1
fig, ax = m.subplots()
ax = fig.add_subplot(111, projection='3d')
line, = ax.plot3D([], [], [], c = 'blue', lw = 1)
line, = ax.scatter([],[],[], c = 'red', s = 30)
def init():
ax.set_xlim([-20,20])
ax.set_ylim([-20,20])
ax.set_zlim([0,50])
ax.set_xlabel('$x(t)$')
ax.set_ylabel('$y(t)$')
ax.set_zlabel('$z(t)$')
return line,
def update(frame):
line.set_data(cA[0:frame*50],cB[0:frame*50])
line.set_3d_properties(cC[0:frame*50])
line.set_data(cA[frame*500,frame*500 + 1],cB[frame*500,frame*500 + 1])
line.set_3d_properties(cC[frame*500,frame*500 + 1])
return line,
ani = animation.FuncAnimation(fig, update,interval=5, frames = len(time),init_func=init, blit=True)
m.show()

Splittig data in python dataframe and getting the array values automatically

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('D:\ history/segment.csv')
data = pd.DataFrame(data)
data = data.sort_values(['Prob_score'], ascending=[False])
one = len(data)
actualpaid_overall = len(data.loc[data['paidstatus'] == 1])
data_split = np.array_split(data, 10)
data1 = data_split[0]
actualpaid_ten = len(data1.loc[data1['paidstatus'] == 1])
percent_ten = actualpaid_ten/actualpaid_overall
data2 = data_split[1]
actualpaid_twenty = len(data2.loc[data2['paidstatus'] == 1])
percent_twenty = (actualpaid_twenty/actualpaid_overall) + percent_ten
data3 = data_split[2]
actualpaid_thirty = len(data3.loc[data3['paidstatus'] == 1])
percent_thirty = (actualpaid_thirty/actualpaid_overall) + percent_twenty
data4 = data_split[3]
actualpaid_forty = len(data4.loc[data4['paidstatus'] == 1])
percent_forty = (actualpaid_forty/actualpaid_overall) + percent_thirty
data5 = data_split[4]
actualpaid_fifty = len(data5.loc[data5['paidstatus'] == 1])
percent_fifty = (actualpaid_fifty/actualpaid_overall) + percent_forty
data6 = data_split[5]
actualpaid_sixty = len(data6.loc[data6['paidstatus'] == 1])
percent_sixty = (actualpaid_sixty/actualpaid_overall) + percent_fifty
data7 = data_split[6]
actualpaid_seventy = len(data7.loc[data7['paidstatus'] == 1])
percent_seventy = (actualpaid_seventy/actualpaid_overall) + percent_sixty
data8 = data_split[7]
actualpaid_eighty = len(data8.loc[data8['paidstatus'] == 1])
percent_eighty = (actualpaid_eighty/actualpaid_overall) + percent_seventy
data9 = data_split[8]
actualpaid_ninenty = len(data9.loc[data9['paidstatus'] == 1])
percent_ninenty = (actualpaid_ninenty/actualpaid_overall) + percent_eighty
data10 = data_split[9]
actualpaid_hundred = len(data10.loc[data10['paidstatus'] == 1])
percent_hundred = (actualpaid_hundred/actualpaid_overall) + percent_ninenty
array_x = [10,20,30,40,50,60,70,80,90,100]
array_y = [ percent_ten, percent_twenty, percent_thirty, percent_forty,percent_fifty, percent_sixty, percent_seventy, percent_eighty, percent_ninenty, percent_hundred]
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
The above is my python code i have splitted my dataframe into 10 equal sections and plotted the graph but I'm not satisfied with this i have two concerns:
array_x = [10,20,30,40,50,60,70,80,90,100] in this line of code i have manually taken the x values, is there any possible way to process automatically as i have taken split(data,10) it should show 10 array values
As we can see the whole data1,2,3,4...10 is being repeated again and again is there a solution to write this in a function or loop.
Any help with codes will be appreciated. Thanks

I believe you need list comprehension and for count is possible use simplier way - sum of boolean mask, True values are processes like 1, then convert list to numpy array and use numpy.cumsum:
data = pd.read_csv('D:\ history/segment.csv')
data = data.sort_values('Prob_score', ascending=False)
one = len(data)
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
plt.xlabel(' Base')
plt.ylabel(' percent')
ax = plt.plot(array_x,array_y)
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth=0.5, color='0.1')
plt.grid( which='both', axis = 'both', linewidth=0.5,color='0.75')
Sample:
np.random.seed(2019)
N = 1000
data = pd.DataFrame({'paidstatus':np.random.randint(3, size=N),
'Prob_score':np.random.randint(100, size=N)})
#print (data)
data = data.sort_values(['Prob_score'], ascending=[False])
actualpaid_overall = (data['paidstatus'] == 1).sum()
data_split = np.array_split(data, 10)
x = [len(x) for x in data_split]
y = [(x['paidstatus'] == 1).sum()/actualpaid_overall for x in data_split]
array_x = np.cumsum(np.array(x))
array_y = np.cumsum(np.array(y))
print (array_x)
[ 100 200 300 400 500 600 700 800 900 1000]
print (array_y)
[0.09118541 0.18844985 0.27963526 0.38601824 0.49848024 0.61702128
0.72036474 0.81155015 0.9331307 1. ]

Very slow plot with Matlpotlib

Can anybody help how to optimize the plot function in python? I use Matplotlib to plot financial data.Here small function for plotting OHLC data. The time increase significantly if I add indicators or other data.
import numpy as np
import datetime
from matplotlib.collections import LineCollection
from pylab import *
import urllib2
def test_plot(OHLCV):
bar_width = 1.3
date_offset = 0.5
fig = figure(figsize=(50, 20), facecolor='w')
ax = fig.add_subplot(1, 1, 1)
labels = ax.get_xmajorticklabels()
setp(labels, rotation=0)
month = MonthLocator()
day = DayLocator()
timeFmt = DateFormatter('%Y-%m-%d')
colormap = OHLCV[:,1] < OHLCV[:,4]
color = np.zeros(colormap.__len__(), dtype = np.dtype('|S5'))
color[:] = 'red'
color[np.where(colormap)] = 'green'
dates = date2num( OHLCV[:,0])
lines_hl = LineCollection( zip(zip(dates, OHLCV[:,2]), zip(dates, OHLCV[:,3])))
lines_hl.set_color(color)
lines_hl.set_linewidth(bar_width)
lines_op = LineCollection( zip(zip((np.array(dates) - date_offset).tolist(), OHLCV[:,1]), zip((np.array(dates)).tolist(), parsed_table[:,1])))
lines_op.set_color(color)
lines_op.set_linewidth(bar_width)
lines_cl = LineCollection( zip(zip((np.array(dates) + date_offset).tolist(), OHLCV[:,4]), zip((np.array(dates)).tolist(), parsed_table[:,4])))
lines_cl.set_color(color)
lines_cl.set_linewidth(bar_width)
ax.add_collection(lines_hl, autolim=True)
ax.add_collection(lines_cl, autolim=True)
ax.add_collection(lines_op, autolim=True)
ax.xaxis.set_major_locator(month)
ax.xaxis.set_major_formatter(timeFmt)
ax.xaxis.set_minor_locator(day)
ax.autoscale_view()
ax.xaxis.grid(True, 'major')
ax.grid(True)
ax.set_title('EOD test plot')
ax.set_xlabel('Date')
ax.set_ylabel('Price , $')
fig.savefig('test.png', dpi = 50, bbox_inches='tight')
close()
if __name__=='__main__':
data_table = urllib2.urlopen(r"http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2012&d=00&e=15&f=2013&g=d&ignore=.csv").readlines()[1:][::-1]
parsed_table = []
#Format: Date, Open, High, Low, Close, Volume
dtype = (lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(),float, float, float, float, int)
for row in data_table:
field = row.strip().split(',')[:-1]
data_tmp = [i(j) for i,j in zip(dtype, field)]
parsed_table.append(data_tmp)
parsed_table = np.array(parsed_table)
import time
bf = time.time()
count = 100
for i in xrange(count):
test_plot(parsed_table)
print('Plot time: %s' %(time.time() - bf) / count)
The result is something like this. Average time execution on each plot is aproximately 2.6s. Charting in R is much faster, but I didn't measure the performance and I don't want use Rpy, so I bielive that my code is inefficient.

This solution reuses a Figure instance and saves plots asynchronously. You could change this to have as many figures as there are processors, do that many plots asynchronously, and it should speed things up even more. As it is, this takes ~1s per plot, down from 2.6 on my machine.
import numpy as np
import datetime
import urllib2
import time
import multiprocessing as mp
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from pylab import *
from matplotlib.collections import LineCollection
class AsyncPlotter():
def __init__(self, processes=mp.cpu_count()):
self.manager = mp.Manager()
self.nc = self.manager.Value('i', 0)
self.pids = []
self.processes = processes
def async_plotter(self, nc, fig, filename, processes):
while nc.value >= processes:
time.sleep(0.1)
nc.value += 1
print "Plotting " + filename
fig.savefig(filename)
plt.close(fig)
nc.value -= 1
def save(self, fig, filename):
p = mp.Process(target=self.async_plotter,
args=(self.nc, fig, filename, self.processes))
p.start()
self.pids.append(p)
def join(self):
for p in self.pids:
p.join()
class FinanceChart():
def __init__(self, async_plotter):
self.async_plotter = async_plotter
self.bar_width = 1.3
self.date_offset = 0.5
self.fig = plt.figure(figsize=(50, 20), facecolor='w')
self.ax = self.fig.add_subplot(1, 1, 1)
self.labels = self.ax.get_xmajorticklabels()
setp(self.labels, rotation=0)
line_hl = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
line_op = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
line_cl = LineCollection(([[(734881,1), (734882,5), (734883,9), (734889,5)]]))
self.lines_hl = self.ax.add_collection(line_hl, autolim=True)
self.lines_op = self.ax.add_collection(line_cl, autolim=True)
self.lines_cl = self.ax.add_collection(line_op, autolim=True)
self.ax.set_title('EOD test plot')
self.ax.set_xlabel('Date')
self.ax.set_ylabel('Price , $')
month = MonthLocator()
day = DayLocator()
timeFmt = DateFormatter('%Y-%m-%d')
self.ax.xaxis.set_major_locator(month)
self.ax.xaxis.set_major_formatter(timeFmt)
self.ax.xaxis.set_minor_locator(day)
def test_plot(self, OHLCV, i):
colormap = OHLCV[:,1] < OHLCV[:,4]
color = np.zeros(colormap.__len__(), dtype = np.dtype('|S5'))
color[:] = 'red'
color[np.where(colormap)] = 'green'
dates = date2num( OHLCV[:,0])
date_array = np.array(dates)
xmin = min(dates)
xmax = max(dates)
ymin = min(OHLCV[:,1])
ymax = max(OHLCV[:,1])
self.lines_hl.set_segments( zip(zip(dates, OHLCV[:,2]), zip(dates, OHLCV[:,3])))
self.lines_hl.set_color(color)
self.lines_hl.set_linewidth(self.bar_width)
self.lines_op.set_segments( zip(zip((date_array - self.date_offset).tolist(), OHLCV[:,1]), zip(date_array.tolist(), OHLCV[:,1])))
self.lines_op.set_color(color)
self.lines_op.set_linewidth(self.bar_width)
self.lines_cl.set_segments( zip(zip((date_array + self.date_offset).tolist(), OHLCV[:,4]), zip(date_array.tolist(), OHLCV[:,4])))
self.lines_cl.set_color(color)
self.lines_cl.set_linewidth(self.bar_width)
self.ax.set_xlim(xmin,xmax)
self.ax.set_ylim(ymin,ymax)
self.ax.xaxis.grid(True, 'major')
self.ax.grid(True)
self.async_plotter.save(self.fig, '%04i.png'%i)
if __name__=='__main__':
print "Starting"
data_table = urllib2.urlopen(r"http://ichart.finance.yahoo.com/table.csv?s=IBM&a=00&b=1&c=2012&d=00&e=15&f=2013&g=d&ignore=.csv").readlines()[1:][::-1]
parsed_table = []
#Format: Date, Open, High, Low, Close, Volume
dtype = (lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(),float, float, float, float, int)
for row in data_table:
field = row.strip().split(',')[:-1]
data_tmp = [i(j) for i,j in zip(dtype, field)]
parsed_table.append(data_tmp)
parsed_table = np.array(parsed_table)
import time
bf = time.time()
count = 10
a = AsyncPlotter()
_chart = FinanceChart(a)
print "Done with startup tasks"
for i in xrange(count):
_chart.test_plot(parsed_table, i)
a.join()
print('Plot time: %.2f' %(float(time.time() - bf) / float(count)))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How can I plot a histogram using pandas? - python

Related

Python bot for trading

Matplotlib live data animation runs out of memory

3d scatter plot animation with blitting

Splittig data in python dataframe and getting the array values automatically

Very slow plot with Matlpotlib

Categories

Resources