I am trying to create a time series of the sea surface temperature data over the whole year for six consecutive years and plot them using the subplots. I want to mark the x-ticks as the months. I tried using the matplotlib.dates option. However the years doesn't change on the subsequent subplots.
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays19 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2019.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays19 = np.delete(arrays19,[0,1,2,3,4,215,216,217],0)
sst14 = []
for i in arrays14:
d1 = i[0]
d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
datetime2.append(d2)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
for i in arrays15:
sst2 = i[2]
sst15.append(sst2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
for i in arrays16:
sst3 = i[2]
sst16.append(sst3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
for i in arrays17:
sst4 = i[2]
sst17.append(sst4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
for i in arrays18:
sst5 = i[2]
sst18.append(sst5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
np.shape(sst18)
sst19 = []
for i in arrays19:
sst6 = i[2]
sst19.append(sst6)
sst19 = np.array(sst19,dtype = np.float64)
sst19_u = np.zeros(len(sst14), dtype = np.float64)
sst19_fill = np.full([118],-9.99,dtype=np.float64)
sst19_u[0:211] = sst19[0:211]
sst19_u[211:329] = sst19_fill
sst19_u[329:365] = sst19[211:247]
sst_19_m = np.ma.masked_equal(sst19_u,-9.99)
##########Plotting
new_epoch = '2016-01-01T00:00:00'
mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot(sst_14_m)
axs[1].plot(sst_15_m)
axs[2].plot(sst_16_m)
axs[3].plot(sst_17_m)
axs[4].plot(sst_18_m)
axs[5].plot(sst_19_m)
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
#axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
I got an output like the following:
I would like to change the xlabels as 2016,2017,2018,2019 etc.
The data can be found in the folder - https://drive.google.com/drive/folders/1bETa7PjWKIUNS13xg3RgIMa5L7bpYn5W?usp=sharing
I love NumPy as much as the next person but this is a good use case for pandas. Pandas has the advantage of being able to label rows with more meaningful things than just positional index. For example, you can use dates. This is very convenient.
First, load your data:
import pandas as pd
import glob
dfs = []
for fname in glob.glob('./sst15n90e_dy_*.ascii'):
df = pd.read_csv(fname, skiprows=4, delimiter='\s+')
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
Now do df.head() and you'll see this:
Let's convert that date to a 'datetime' object, and use it as the index instead of the default row numbers. We'll also deal with those -9.99 values.
import numpy as np
df['ds'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
df = df.set_index('ds')
df = df.sort_index()
df.loc[df['SST'] == -9.99, 'SST'] = np.nan
Now you have a dataset you can do all sorts of magic with, like df.resample('Y')['SST'].sum() shows you the annual sum of SST.
Anyway, now we can make plots in various ways. You can plot DataFrames directly, eg check out df.groupby(df.index.year)['SST'].plot(). Or you can use seaborn (check out the gallery!), which understands DataFrames. Or you can construct a plot with matplotlib in the usual way. For instance:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 8), sharey=True)
for ax, (year, group) in zip(axs.flat, df.groupby(df.index.year)):
ax.plot(group['SST'])
ax.set_title(year)
ax.grid(c='k', alpha=0.15)
date_form = DateFormatter("%b")
ax.xaxis.set_major_formatter(date_form)
plt.tight_layout()
This is close to what you wanted, but with a more useful data structure and quite a bit less code:
I did some modifications and got the results as desired:
from pickletools import float8
import os
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
from datetime import datetime
# for files in os.listdir('/home/swadhin/project/sst/daily'):
# path = (files)
# print(path)
# arrays = [np.asarray(list(map(str, line.split()))) for line in open(files)]
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays08 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2008.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays08 = np.delete(arrays08,[0,1,2,3,4,215,216,217],0)
sst14 = []
datetime1 = [] #year, month,date
#datetime2 = [] #hour,min,second
for i in arrays14:
d1 = i[0]
#d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
#datetime2.append(d2)
#reading the data
# datetime1 = np.array(datetime1,dtype = np.float64)
# datetime2 = np.array(datetime2,dtype = np.float64)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
datetime2 = []
for i in arrays15:
d2 = i[0]
sst2 = i[2]
sst15.append(sst2)
datetime2.append(d2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
datetime3 = []
for i in arrays16:
d3 = i[0]
sst3 = i[2]
sst16.append(sst3)
datetime3.append(d3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
datetime4 = []
for i in arrays17:
d4 = i[0]
sst4 = i[2]
sst17.append(sst4)
datetime4.append(d4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
datetime5 = []
for i in arrays18:
d5 = i[0]
sst5 = i[2]
sst18.append(sst5)
datetime5.append(d5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
sst08 = []
datetime6 = []
for i in arrays08:
d6 = i[0]
sst6 = i[2]
sst08.append(sst6)
datetime6.append(d6)
sst08 = np.array(sst08,dtype = np.float64)
# sst08_u = np.zeros(len(sst14), dtype = np.float64)
# sst08_fill = np.full([118],-9.99,dtype=np.float64)
# sst08_u[0:211] = sst08[0:211]
# sst08_u[211:329] = sst08_fill
# sst08_u[329:365] = sst08[211:247]
sst_08_m = np.ma.masked_equal(sst08,-9.99)
dt = np.asarray([datetime1,datetime2,datetime3,datetime4,datetime5,datetime6])
dt_m = []
for i in dt:
dt_m1= []
for j in i:
datetime_object = datetime.strptime(j,'%Y%m%d')
dt_m1.append(datetime_object)
dt_m.append(dt_m1)
##########Plotting
# new_epoch = '2016-01-01T00:00:00'
# mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot_date(dt_m[5],sst_08_m,'-')
axs[1].plot_date(dt_m[0],sst_14_m,'-')
axs[2].plot_date(dt_m[1],sst_15_m,'-')
axs[3].plot_date(dt_m[2],sst_16_m,'-')
axs[4].plot_date(dt_m[3],sst_17_m,'-')
axs[5].plot_date(dt_m[4],sst_18_m,'-')
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
And it solved the issue.
I have written a script that reads the HTML tables from the outlook application and has 4 columns (Ticker, Level, Pkey, Date). I need to put these values in an insert query, how can I bifurcate data frame into different columns so that the insert query can take references from different columns?
import pandas as pd
import win32com.client
import numpy as np
from sqlalchemy.engine import create_engine
import re
from datetime import datetime, timedelta
import requests
import sys
from bs4 import BeautifulSoup
from pprint import pprint
EMAIL_ACCOUNT = 'xyz'
EMAIL_SUBJ_SEARCH_STRING = ('tg')
#EMAIL_CONTNT = {'Ticker': [df.to_string()[-82:-74], 'TickerLevel': [price], 'DATE': [Date]}
out_app = win32com.client.gencache.EnsureDispatch("Outlook.Application")
out_namespace = out_app.GetNamespace("MAPI")
#lastWeekDateTime = dt.datetime.now() - dt.timedelta(days=1)
#lastWeekDateTime = lastWeekDateTime.strftime('%m/%d/%Y %H:%M %p')
root_folder = out_namespace.GetDefaultFolder(6)
out_iter_folder = root_folder.Folders['Email_Snapper']
#out_iter_folder = out_iter_folder.Restrict("[ReceivedTime] >= '" + lastWeekDateTime + "'")
item_count = out_iter_folder.Items.Count
Flag = False
cnt = 1
if item_count > 0:
for i in range(item_count, 0, -1):
message = out_iter_folder.Items[i]
if EMAIL_SUBJ_SEARCH_STRING in message.Subject and cnt <=1:
cnt=cnt+1
subject = message.Subject
if datetime.now().weekday() == 0:
DT = datetime.strftime(datetime.now() - timedelta(days=3), '%Y-%m')
else:
DT = datetime.strftime(datetime.now() - timedelta(days=1), '%Y-%m')
DT = subject[subject.find(DT): subject.find(DT) + 10]
Date = datetime.strptime(DT, '%Y-%m-%d').strftime("'%d%b%Y'")
print(Date)
Body_content = message.HTMLBody
Body_content = BeautifulSoup(Body_content,"lxml")
html_tables = Body_content.find_all('table')[0]
#Body_content = Body_content[:Body_content.find("Disclaimer")].strip()
df = pd.read_html(str(html_tables),header=0)[0]
Pkey = {'MSUSDSP5': 71763307, 'MSUSSPVP': 76366654,'MSCBICCO':137292386, 'MSCBWGSP':151971418, 'MSCBBGEC':151971419, 'MSUSEVHI':152547427, 'MSCBCTAD':152547246}
df['Pkey'] = df['Ticker'].map(Pkey)
df['Date'] = Date
print(df)
sql_query = 'call CORE_VALUATIONS.VALUATIONS.INSERTEQCLOSINGPRICE(%d, %s, %s, NULL, NULL)' % ((Pkey), Date, Level)
print(sql_query)
I'm retrieving live data to use it for further processing in a dataframe.
The first part (get_binance_bars function)gets the historical data where a linear regression line is fitted.
Now, I would like to have the linear regression line to be updated whenever the websocket receives data in. The changing live data is in df['live_price'].
How would you do this?
import websocket, json
import requests
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime, date
from sklearn.linear_model import LinearRegression
symbol = "ETHUSDT"
tf = "1m"
now = datetime.now()
today = date.today()
d = int(today.strftime("%d"))
m = int(today.strftime("%m"))
y = int(today.strftime("%Y"))
hr = int(now.strftime("%H"))
mn = int(now.strftime("%M"))
def get_binance_bars(ticker, interval, startTime, endTime):
url = "https://api.binance.com/api/v3/klines"
startTime = str(int(startTime.timestamp() * 1000))
endTime = str(int(endTime.timestamp() * 1000))
limit = '1000'
req_params = {"symbol" : ticker, 'interval' : interval, 'startTime' : startTime, 'endTime' : endTime, 'limit' : limit}
df = pd.DataFrame(json.loads(requests.get(url, params = req_params).text))
if (len(df.index) == 0):
return None
df = df.iloc[:, 0:4]
df.columns = ['time', 'high', 'low', 'close']
df.close = df.close.astype("float")
df.low = df.low.astype("float")
df.high = df.high.astype("float")
global Y_pred
X = df.time.iloc[-20:].values.reshape(-1, 1)
Y = df.close.iloc[-20:].values.reshape(-1, 1)
linear_regressor = LinearRegression()
linear_regressor.fit(X, Y)
Y_pred = linear_regressor.predict(X)
df['Y_pred'] = np.nan
df.iloc[-20:, df.columns.get_loc('Y_pred')] = Y_pred
df.time = [dt.datetime.fromtimestamp(x / 1000.0) for x in df.time]
df.drop(df.tail(1).index,inplace=True) #cut last row to prevent double bar with live data
return df
SOCKET = "wss://stream.binance.com:9443/ws/"+symbol.lower()+"#kline_"+tf
df = get_binance_bars(symbol, tf, dt.datetime(y, m, d, hr-hr, mn), dt.datetime(y, m, d, hr, mn)) #define how many bars, hr-1 = 60 bars
def on_open(ws):
print('opened connection')
def on_close(ws):
print('closed connection')
def on_message(ws, message):
global df
global time_plot
global close
global low
global high
json_message = json.loads(message)
high = float(json_message['k']['h'])
low = float(json_message['k']['l'])
close = float(json_message['k']['c'])
time_plot = dt.datetime.fromtimestamp(json_message['k']['t']/1000).strftime('%H:%M')
df['live_price'] = close
df.iloc[-20:, df.columns.get_loc('Y_pred')] = Y_pred #<--- DOESN'T WORK
print(df)
ws = websocket.WebSocketApp(SOCKET, on_open=on_open, on_close=on_close, on_message=on_message)
ws.run_forever()
I would prefer to use Plotly for this sort of work. Please have a look at the Dash component with Interval for updating the graphs and plots. It would be useful in the longer run and making the dashboards.
I am trying to monitor sensor data in real-time, but the plot shows nothing, below is just an example.
can anyone explain to me how come the result is showing nothing?
import datetime
import random
import matplotlib.pyplot as plt
from drawnow import *
from matplotlib.dates import AutoDateLocator, AutoDateFormatter, date2num
i = 0
x = 0
y = 0
FirstTime = str('00:00')
LastTime = str('00:00')
def CreatePlot():
figure = plt.subplot()
plt.plot([],[])
date_datetime = datetime.datetime.strptime(LastTime, '%H:%M')
int_date = date2num( date_datetime)
locator = AutoDateLocator()
figure.xaxis.set_major_locator(locator)
figure.xaxis.set_major_formatter( AutoDateFormatter(locator) )
min_date = date2num( datetime.datetime.strptime(FirstTime, '%H:%M') )
max_date = date2num( datetime.datetime.strptime(LastTime, '%H:%M') )
plt.xlim(min_date, max_date)
plt.plot(x,y, 'r-')
plt.gcf().autofmt_xdate()
while True:
x = datetime.datetime.now() + datetime.timedelta(minutes=i)
x = datetime.datetime.strftime(x,'%H:%M')
if i == 0:
FirstTime = x
else:
LastTime = x
y = (2*i)+2
if i>500:
break
else:
drawnow(CreatePlot)
plt.pause(0.0001)
i+=1
I solved the issue, so I am gonna explain it to help someone else like me,
the first issue is changing date format to string with strftime,
plotting string in x-axis is not auto-formattable,
also following commands are redundant:
min_date = date2num( datetime.datetime.strptime(FirstTime, '%H:%M') )
max_date = date2num( datetime.datetime.strptime(LastTime, '%H:%M') )
plt.xlim(min_date, max_date)
in addition, to make a better view someone can add the following commands too:
from matplotlib.ticker import AutoMinorLocator
from matplotlib.dates import AutoDateLocator, AutoDateFormatter
.
.
.
.
ax0 = plt.subplot(2,2,1)
locator = AutoDateLocator()
ax0.xaxis.set_major_locator(locator)
formatter = AutoDateFormatter(locator)
ax0.xaxis.set_major_formatter(formatter)
ax0.xaxis.set_minor_locator(AutoMinorLocator())
I have folder names which are date time formated as
2018-08-21 to 2018-10-16
Inside each folder there is a zip files which contains time values which is a linear scale, the time goes up linearly.
I'm trying to plot for each day, which has a lot of .bz2 file time series data, the time value at that date.
Right now I'm trying to do that:
timearr = np.asarray(data1['time'])
ax.plot(np.asarray(timeStamps), timearr)
ax.set_title('title')
ax.set_ylabel('date vs time ')
ax.grid(True)
# Format the x-axis for dates (label formatting, rotation)
fig.autofmt_xdate(rotation=45)
fig.tight_layout()
plt.show()
but I get an error message, that both dimensions doesn't match.
dateStamps are list[2018-08-21
2018-08-22
2018-08-23
2018-08-24
2018-08-25]
data1['time'] = list of EPOC values.
Unfortunately, I don't know the detailed file structure, so I have to guess a little were the problem actually is
Here is some code to generate some folders with generic bz2 files:
import bz2
import numpy as np
import datetime
import os
startDate = datetime.datetime(2000,5,2,10,15,0,0)
for day in range(5):
theDate = startDate + datetime.timedelta(days=day)
folder = "{}".format( theDate.replace( microsecond = 0 ).strftime("%Y-%m-%d") )
os.mkdir( folder )
data = ""
for k in range(100):
zzz = theDate + datetime.timedelta(seconds=137*k)
data += "{} ".format( zzz.replace( microsecond = 0 ).strftime("%H:%M:%S") )
d = zzz.day
m = zzz.minute
data += " {}\n".format( .17 * d + .003 * m**2 -.001 * m )
myZip = bz2.BZ2File(os.path.join( folder, 'dat.bz2' ), 'w' )
myZip.write( data )
myZip.close()
Those folders and files a treat with:
import bz2
import numpy as np
import datetime
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
"""
SE posts I used
https://stackoverflow.com/questions/1574088/plotting-time-in-python-with-matplotlib
https://stackoverflow.com/questions/11264521/date-ticks-and-rotation-in-matplotlib
"""
def split_data( inData ):
rows=data.strip().split('\n')
rowcol = [x.split() for x in rows ]
x,y = zip(*rowcol)
y = [float(z) for z in y ]
x = [ datetime.datetime.strptime(z, '%H:%M:%S') for z in x]
return x,y
dataDict = dict()
for root, dirs, files in os.walk("."):
for name in files:
if name.split('.')[-1]=='bz2':
base = os.path.basename( root )
myPath = (os.path.join(root, name))
bz = bz2.BZ2File( myPath, 'r' )
data = bz.read()
dataDict[ base ] = split_data( data )
myFmt = mdates.DateFormatter('%H:%M')
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
for key, dt in dataDict.iteritems():
ax.plot( *dt , label=key )
ax.xaxis.set_major_formatter(myFmt)
for label in ax.get_xmajorticklabels():
label.set_rotation(30)
ax.set_ylabel('data (arb. u.)')
ax.set_xlabel('time')
ax.legend( loc=0 )
plt.tight_layout()
plt.show()
Providing:
Hope I got it right.