i am just starte learn matplotlib. i am try to plot yahoo char api plot stock. i am try this program but it is not working...there is my program
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def graph():
date, closep, highp, lowp, openp, valuep = np.loadtxt('/home/najeeb/Desktop/table.csv', delimiter=',', unpack=True,
converters={0: mdates.strpdate2num('%Y-%m-%d')})
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1, axisbg='w')
plt.plot_date(x=date, y=value, fmt='-')
plt.title('title')
plt.ylabel('value')
plt.xlabel('date')
plt.show()
graph()
here is CSV file
please guide me how to solve this problem and there another any way to plot stock graph...thank you
The CSV file looked like this:
Date,Open,High,Low,Close,Volume,Adj Close
2014-10-17,97.50,99.00,96.81,97.67,68032200,97.67
2014-10-16,95.55,97.72,95.41,96.26,72110700,96.26
2014-10-15,97.97,99.15,95.18,97.54,100875400,97.54
Your code np.loadtxt() was trying to parse the header 'Date' as a date, which didn't work because that string wasn't a valid date value, so I used skiprows=1 to skip the header.
The other problem was, the CSV has 7 columns, and in your tuple you were unpacking only 6 values
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import urllib2
url = 'http://ichart.finance.yahoo.com/table.csv?s=AAPL&d=9&e=14&f=2008&g=d&a=8&b=7&c=1984&ignore=.csv'
def graph():
date, open, high, low, close, volume, adj_close = np.loadtxt(urllib2.urlopen(url), skiprows=1, delimiter=',', unpack=True, converters={0: mdates.strpdate2num('%Y-%m-%d')})
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1, axisbg='w')
plt.plot_date(x=date, y=adj_close, fmt='-')
plt.title('Apple, 1984 to 2008')
plt.ylabel('Adjusted close')
plt.xlabel('Date')
plt.show()
graph()
Related
my purpose is to create an anomaly graph for a stock that have dates and close. I tried to create outliers, but I get the lines not in the place I want. For example, I want the line to be in the year of 2019 and after 2020 where there are drastic changes. The X line has dates and the problem I don't know how to write the outliers
I thought to write y["2019"]=40 for example but it doesn't do anything
from pandas import read_csv
from matplotlib import pyplot
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
#from IPython.core.debugger import set_trace
#import data
AAPL= pd.read_csv('AAPL.csv', header=0, squeeze=True)
x=AAPL['Date']
x=pd.to_datetime(x)
y=AAPL['Close/Last']
plt.figure(figsize=(15,7))
plt.plot(x, y, label="Close")
plt.title("AAPL")
plt.xlabel("Time")
plt.ylabel("Close")
plt.xticks(rotation=0)
plt.grid()
plt.show()
y[5] = 5
y[60] =55
y[85] = 1.4
n_outliers = 3
plt.figure(figsize=(15,7))
plt.plot(x,y)
plt.scatter(x,y)
plt.grid()
plt.ylabel('Y')
plt.xlabel('x')
plt.show()
Thank you in advance
Using matplotlib and mpl_finance to plot candlesticks. Data is in csv AAPL.
I want to show the x-axis as year and month only, i.e."yyyy-mmm", so:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
import matplotlib.dates as mdates
data = pd.read_csv('C:\\AAPL.csv', delimiter = "\t")
data = data.sort_values(['Date'], ascending=True)
data = data.tail(100)
fig = plt.figure(figsize=(6,4))
plt.ylim(60, 200)
ax1 = fig.add_subplot(111)
cl =candlestick2_ohlc(ax=ax1,opens=data['Open'],highs=data['High'],lows=data['Low'],closes=data['Close'],width=0.6)
ax1.set_xticks(np.arange(len(data)))
ax1.set_xticklabels(data['Date'], fontsize=10, rotation=90)
# every month of the year like 2008-Jan, 2008-Feb...
locator = mdates.MonthLocator()
fmt = mdates.DateFormatter('%Y-%b')
X = plt.gca().xaxis
X.set_major_locator(locator)
X.set_major_formatter(fmt)
plt.show()
It doesn't show anything.
Also tried below but doesn't work neither:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
How can I have the x-axis only show the year and month??
Thank you.
Try following solution,
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates
data = pd.read_csv('C:\AAPL.csv')
data = data.sort_values(['Date'], ascending=True)
data = data.tail(100)
from matplotlib.dates import date2num, DayLocator, DateFormatter
data['Date'] = date2num(pd.to_datetime(data['Date']).tolist())
fig, ax=plt.subplots(figsize=(10, 10))
candlestick_ohlc(ax, data.as_matrix(),width=0.6)
ax.set(xlabel='AAPL')
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_major_formatter(DateFormatter('%Y-%b'))
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=4))
plt.show()
Note: I have used candlestick_ohlc instead of candlestick2_ohlc.
Output :
I'm trying to adjust the formatting of the date tick labels of the x-axis so that it only shows the Year and Month values. From what I've found online, I have to use mdates.DateFormatter, but it's not taking effect at all with my current code as is. Anyone see where the issue is? (the dates are the index of the pandas Dataframe)
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
fig = plt.figure(figsize = (10,6))
ax = fig.add_subplot(111)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
basicDF['some_column'].plot(ax=ax, kind='bar', rot=75)
ax.xaxis_date()
Reproducible scenario code:
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
rng = pd.date_range('1/1/2014', periods=20, freq='m')
blah = pd.DataFrame(data = np.random.randn(len(rng)), index=rng)
fig = plt.figure(figsize = (10,6))
ax = fig.add_subplot(111)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
blah.plot(ax=ax, kind='bar')
ax.xaxis_date()
Still can't get just the year and month to show up.
If I set the format after .plot , get an error like this:
ValueError: DateFormatter found a value of x=0, which is an illegal date. This usually occurs because you have not informed the axis that it is plotting dates, e.g., with ax.xaxis_date().
It's the same for if I put it before ax.xaxis_date() or after.
pandas just doesn't work well with custom date-time formats.
You need to just use raw matplotlib in cases like this.
import numpy
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas
N = 20
numpy.random.seed(N)
dates = pandas.date_range('1/1/2014', periods=N, freq='m')
df = pandas.DataFrame(
data=numpy.random.randn(N),
index=dates,
columns=['A']
)
fig, ax = plt.subplots(figsize=(10, 6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.bar(df.index, df['A'], width=25, align='center')
And that gives me:
Solution with pandas only
You can create nicely formatted ticks by using the DatetimeIndex and taking advantage of the datetime properties of the timestamps. Tick locators and formatters from matplotlib.dates are not necessary for a case like this unless you would want dynamic ticks when using the interactive interface of matplotlib for zooming in and out (more relevant for time ranges longer than in this example).
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
# Create sample time series with month start frequency, plot it with a pandas bar chart
rng = np.random.default_rng(seed=1) # random number generator
dti = pd.date_range('1/1/2014', periods=20, freq='m')
df = pd.DataFrame(data=rng.normal(size=dti.size), index=dti)
ax = df.plot.bar(figsize=(10,4), legend=None)
# Set major ticks and tick labels
ax.set_xticks(range(df.index.size))
ax.set_xticklabels([ts.strftime('%b\n%Y') if ts.year != df.index[idx-1].year
else ts.strftime('%b') for idx, ts in enumerate(df.index)])
ax.figure.autofmt_xdate(rotation=0, ha='center');
The accepted answer claims that "pandas won't work well with custom date-time formats", but you can make use of pandas' to_datetime() function to use your existing datetime Series in the dataframe:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import pandas as pd
rng = pd.date_range('1/1/2014', periods=20, freq='m')
blah = pd.DataFrame(data = np.random.randn(len(rng)), index=pd.to_datetime(rng))
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(DateFormatter('%m-%Y'))
ax.bar(blah.index, blah[0], width=25, align='center')
Will result in:
You can see the different available formats here.
I stepped into the same problem and I used an workaround to transform the index from date time format into the desired string format:
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
rng = pd.date_range('1/1/2014', periods=20, freq='m')
blah = pd.DataFrame(data = np.random.randn(len(rng)), index=rng)
fig = plt.figure(figsize = (10,6))
ax = fig.add_subplot(111)
# transform index to strings
blah_test = blah.copy()
str_index = []
for s_year,s_month in zip(blah.index.year.values,blah.index.month.values):
# build string accorind to format "%Y-%m"
string_day = '{}-{:02d}'.format(s_year,s_month)
str_index.append(string_day)
blah_test.index = str_index
blah_test.plot(ax=ax, kind='bar', rot=45)
plt.show()
which results in the following figure:
I try to do first steps with pandas.
After a few successful steps I stuck with the following task: display data with OHLC bars.
I downloaded data for Apple stock from Google Finance and stored it to *.csv file.
After a lot of search I wrote the following code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
from matplotlib.finance import candlestick_ohlc
#read stored data
#First two lines of csv:
#Date,Open,High,Low,Close
#2010-01-04,30.49,30.64,30.34,30.57
data = pd.read_csv("AAPL.csv")
#graph settings
fig, ax = plt.subplots()
ax.xaxis_date()
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
plt.xlabel("Date")
plt.ylabel("Price")
plt.title("AAPL")
#convert date to float format
data['Date2'] = data['Date'].map(lambda d: mdates.date2num(dt.datetime.strptime(d, "%Y-%m-%d")))
candlestick_ohlc(ax, (data['Date2'], data['Open'], data['High'], data['Low'], data['Close']))
plt.show()
But it displays empty graph.
What is wrong with this code?
Thanks.
You need to change the last line to combine tuples daily. The following code:
start = dt.datetime(2015, 7, 1)
data = pd.io.data.DataReader('AAPL', 'yahoo', start)
data = data.reset_index()
data['Date2'] = data['Date'].apply(lambda d: mdates.date2num(d.to_pydatetime()))
tuples = [tuple(x) for x in data[['Date2','Open','High','Low','Close']].values]
fig, ax = plt.subplots()
ax.xaxis_date()
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
plt.xticks(rotation=45)
plt.xlabel("Date")
plt.ylabel("Price")
plt.title("AAPL")
candlestick_ohlc(ax, tuples, width=.6, colorup='g', alpha =.4);
Produces the below plot:
which you can further tinker with.
My data looks as follows:
2012021305, 65217
2012021306, 82418
2012021307, 71316
2012021308, 66833
2012021309, 69406
2012021310, 76422
2012021311, 94188
2012021312, 111817
2012021313, 127002
2012021314, 141099
2012021315, 147830
2012021316, 136330
2012021317, 122252
2012021318, 118619
2012021319, 115763
2012021320, 121393
2012021321, 130022
2012021322, 137658
2012021323, 139363
Where the first column is the data YYYYMMDDHH . I'm trying to graph the data using the csv2rec module. I can get the data to graph but the x axis and labels are not showing up the way that I expect them to.
import matplotlib
matplotlib.use('Agg')
from matplotlib.mlab import csv2rec
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pylab import *
output_image_name='plot1.png'
input_filename="data.log"
input = open(input_filename, 'r')
input.close()
data = csv2rec(input_filename, names=['time', 'count'])
rcParams['figure.figsize'] = 10, 5
rcParams['font.size'] = 8
fig = plt.figure()
plt.plot(data['time'], data['count'])
ax = fig.add_subplot(111)
ax.plot(data['time'], data['count'])
hours = mdates.HourLocator()
fmt = mdates.DateFormatter('%Y%M%D%H')
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(fmt)
ax.grid()
plt.ylabel("Count")
plt.title("Count Log Per Hour")
fig.autofmt_xdate(bottom=0.2, rotation=90, ha='left')
plt.savefig(output_image_name)
I assume this has something to do with the date format. Any suggestions?
You need to convert the x-values to datetime objects
Something like:
time_vec = [datetime.strp(str(x),'%Y%m%d%H') for x in data['time']]
plot(time_vec,data['count'])
Currently, you are telling python to format integers (2012021305) as a date, which it does not know how to do, so it returns and empty string (although, I suspect that you are getting errors raised someplace).
You should also check your format string mark up.