Add line to pandas plot - python

Using pandas I create a plot of a time series like this:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ax = ts.plot()
ax.axhline(y=ts.mean(), xmin=-1, xmax=1, color='r', linestyle='--', lw=2)
I would like to add another horizontal line at the level of the mean using only data from February. The mean is just ts.loc['2016-02'], but how do I add a horizontal line at that level that doesn't go across the whole figure, but only for the dates in February?

Or you can create a new time series whose values are the mean and index only spans February.
ts_feb_mean = ts['2016-02'] * 0 + ts['2016-02'].mean()
All together it looks like:
import numpy as np
import pandas as pd
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
# Feb mean
ts_fm = ts['2016-02'] * 0 + ts['2016-02'].mean()
ts_fm = ts_fm.reindex_like(ts)
# Total mean
ts_mn = ts * 0 + ts.mean()
# better control over ax
fig, ax = plt.subplots(1, 1)
ts.plot(ax=ax)
ts_mn.plot(ax=ax)
ts_fm.plot(ax=ax)

You can use xmin and xmax to control where in the chart the line starts and ends. But this is in percent of the chart.
import numpy as np
import pandas as pd
np.random.seed([3, 1415])
rng = pd.date_range('2016-01-01', periods=60, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts_feb = ts['2016-02']
# used to figure out where to start and stop
ts_len = float(len(ts))
ts_len_feb = float(len(ts_feb))
ratio = ts_len_feb / ts_len
ax = ts.plot()
ax.axhline(y=ts.mean() * 5, xmin=0, xmax=1, color='r', linestyle='--', lw=2)
ax.axhline(y=ts_feb.mean() * 5, xmin=(1. - ratio), xmax=1, color='g', linestyle=':', lw=2)

Related

Panel is overlapping and has a wrong ratio in mplfinance plot

I'm trying to plot a subplot but there are two problems.
#1 The panel_ratio setting (6,1) is unnoticed.
#2 The y axis of the top panel juts down and overlaps the y axis of the bottom panel, so that the bars are trimmed in the top panel
What is wrong with the code?
import pandas as pd
import numpy as np
from matplotlib.animation import FuncAnimation
import mplfinance as mpf
times = pd.date_range(start='2022-01-01', periods=50, freq='ms')
def get_rsi(df, rsi_period):
chg = df['close'].diff(1)
gain = chg.mask(chg<0,0)
loss = chg.mask(chg>0,0)
avg_gain = gain.ewm(com=rsi_period-1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period-1, min_periods=rsi_period).mean()
rs = abs(avg_gain/avg_loss)
rsi = 100 - (100/(1+rs))
return rsi
df = pd.DataFrame(np.random.randint(3000, 3100, (50, 1)), columns=['open'])
df['high'] = df.open+5
df['low'] = df.open-2
df['close'] = df.open
df['rsi14'] = get_rsi(df, 14)
df.set_index(times, inplace=True)
lows_peaks = df.low.nsmallest(5).index
fig = mpf.figure(style="charles",figsize=(7,8))
ax1 = fig.add_subplot(1,1,1)
ax2 = fig.add_subplot(2,1,2)
ap0 = [ mpf.make_addplot(df['rsi14'],color='g', ax=ax2, ylim=(10,90), panel=1) ]
mpf.plot(df, ax=ax1, ylim=(2999,3104), addplot=ap0, panel_ratios=(6,1))
mpf.show()
In this case, it is easier to use a panel instead of an external axis. I tried your code and could not improve it. For a detailed reference on panels, see here.
# fig = mpf.figure(style="charles", figsize=(7,8))
# ax1 = fig.add_subplot(1,1,1)
# ax2 = fig.add_subplot(2,1,2)
ap0 = mpf.make_addplot(df[['rsi14']], color='g', ylim=(10,90), panel=1)
mpf.plot(df[['open','high', 'low','close']], addplot=ap0, ylim=(2999,3104), panel_ratios=(6,1), style='charles')
mpf.show()

matplotlib date formatter or date locator is not showing the first date index and starting from the next one

I want my x-ticks to show mondays only as Month-Day. I try the solution here I get the the correct tick format however there is something wrong with locator and the first date is not shown correctly. The first tick should be at Feb 03 based on my indexing.
The code to reproduce my results is below:
import seaborn as sns
import matplotlib.dates as mdates
import datetime as dt
import matplotlib.ticker as ticker
import pandas as pd
width, height = plt.figaspect(.30)
fig,ax = plt.subplots(1,1, figsize=(width,height), dpi=300, constrained_layout=False)
day_pal = sns.color_palette("pastel",7)
date_df = pd.DataFrame()
date_df['ts'] = pd.Series(pd.date_range(START_DATE, periods=12*7, freq="D"))
date_df['weekday'] = date_df['ts'].dt.weekday
print(date_df.ts[0])
print(date_df.ts[len(x)-1])
x = list(date_df.ts)
daily_totals = range(len(x)) + np.random.randint(0,10,len(x))
ax.plot(x, daily_totals, lw=3, color='black',alpha=0.5)
plt.axvline(x[42], color="red", lw=5, linestyle="--", alpha = 0.6)
for wkdy in range(0,5):
start = np.array(date_df[date_df.weekday==wkdy]['ts'])
end = start + pd.Timedelta(days=1)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = day_pal[wkdy])
start = np.array(date_df[date_df.weekday==5]['ts'])
end = start + pd.Timedelta(days=2)
for i in range(len(start)):
ax.axvspan(start[i],end[i],alpha=0.5,color = "gray")
ax.set_ylabel("Number of Trips")
ax.set_xlabel("Date")
ax.set_xlim(x[0], x[len(x) -1 ] )
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b-%d"))
ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b-%d"))
plt.xticks(rotation=65)
plt.show()
Resulting Plot:

how to make graph

i want to make graph using matplotlib in python.
np.load(name.npy')
i searched many things and i tried
for example..just...
x = [dt.datetime(2003, 05, 01), dt.datetime(2008, 06, 01)]
df = np.load(r'file')
y = df
Replace the end date on the date-range to your desired graph, and the 'y' should be array loaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
start_date = '2003-05-01'
y = np.load(r'c:\python27\abnormal.npy')
x = pd.date_range(start=start_date, periods=len(y), freq='D')
plt.plot(x,y,'.')
plt.show()
If your input array does not contain ordered pairs such as [(0,1), (1,1), (2,2)] and only contains one set of numbers '[1, 2, 3, 4]`, you neeed to create a set of x-coordinates. For a time series in days, you could try something like this:
import datetime
import numpy as np
import matplotlib.pyplot as plt
def getData(fileName):
# Load the data file to serve as y-axis coordinates
y = np.load(fileName)
# For each y coordinate we need an x coordinate
time_offset = list(range(len(y)))
# Convert time_offset to a time-series
# We will assume x-values equal number of days since a beginDate
x = []
beginDate = datetime.date(2015, 6, 1) # The date to begin our time series
for n in time_offset:
date = beginDate + datetime.timedelta(n) # Date + number_of_Days_passed
x.append(date)
return x, y
def plot(x, y):
# Plot the data
fig = plt.figure()
ax = plt.subplot2grid((1,1), (0,0), rowspan=1, colspan=1)
ax.scatter(x, y)
for label in ax.xaxis.get_ticklabels():
label.set_rotation(90)
ax.grid(True)
plt.subplots_adjust(left=.10, bottom=.19, right=.93, top=.95, wspace=.20, hspace=0)
plt.show()
x, y = getData('abnormal.npy')
plot(x, y)

How can I visualize my csv data into cluster

I want to visualize my csv data into cluster.
This is my csv data.(https://github.com/soma11soma11/EnergyDataSimulationChallenge/blob/challenge2/soma11soma/challenge2/analysis/Soma/total_watt.csv)
For your infomation.
I could visualzie the csv data into 3D graph.
And this is my code.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
MY_FILE = 'total_watt.csv'
df = pd.read_csv(MY_FILE, parse_dates=[0], header=None, names=['datetime', 'consumption'])
df['date'] = [x.date() for x in df['datetime']]
df['time'] = [x.time() for x in df['datetime']]
pv = df.pivot(index='time', columns='date', values='consumption')
# to avoid holes in the surface
pv = pv.fillna(0.0)
xx, yy = np.mgrid[0:len(pv),0:len(pv.columns)]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf=ax.plot_surface(xx, yy, pv.values, cmap='jet', cstride=1, rstride=1)
fig.colorbar(surf, shrink=0.5, aspect=10)
dates = [x.strftime('%m-%d') for x in pv.columns]
times = [x.strftime('%H:%M') for x in pv.index]
ax.set_title('Energy consumptions Clusters', color='lightseagreen')
ax.set_xlabel('time', color='darkturquoise')
ax.set_ylabel('date(year 2011)', color='darkturquoise')
ax.set_zlabel('energy consumption', color='darkturquoise')
ax.set_xticks(xx[::10,0])
ax.set_xticklabels(times[::10], color='lightseagreen')
ax.set_yticks(yy[0,::10])
ax.set_yticklabels(dates[::10], color='lightseagreen')
ax.set_axis_bgcolor('black')
plt.show()
#Thanks for reading! Looking forward to the Skype Interview.
And this is the graph, I got from this code.
I think I should change some points of this code, in order to cluster the data into three group: high, medium and low energy consumption.
The image I want to get from clustering the data is like this.(2D, 3colours.)
k-means????? should I use?..
Here is the result using KMeans.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from sklearn.cluster import KMeans
MY_FILE = '/home/Jian/Downloads/total_watt.csv'
df = pd.read_csv(MY_FILE, parse_dates=[0], header=None, names=['datetime', 'consumption'])
df['date'] = [x.date() for x in df['datetime']]
df['time'] = [x.time() for x in df['datetime']]
stacked = df.pivot(index='time', columns='date', values='consumption').fillna(0).stack()
# do unsupervised clustering
# =============================================
estimator = KMeans(n_clusters=3, random_state=0)
X = stacked.values.reshape(len(stacked), 1)
cluster = estimator.fit_predict(X)
# check the mean value of each cluster
X[cluster==0].mean() # Out[53]: 324.73175293698534
X[cluster==1].mean() # Out[54]: 6320.8504071851467
X[cluster==2].mean() # Out[55]: 1831.1473140192766
# plotting
# =============================================
fig, ax = plt.subplots(figsize=(10, 8))
x = stacked.index.labels[0]
y = stacked.index.labels[1]
ax.scatter(x[cluster==0], y[cluster==0], label='mean: {}'.format(X[cluster==0].mean()), c='g', alpha=0.8)
ax.scatter(x[cluster==1], y[cluster==1], label='mean: {}'.format(X[cluster==1].mean()), c='r', alpha=0.8)
ax.scatter(x[cluster==2], y[cluster==2], label='mean: {}'.format(X[cluster==2].mean()), c='b', alpha=0.8)
ax.legend(loc='best')

Graph Plot axes scaling / design / time format

My current Pandas / python plot looks like this:
What I like to have:
I want to get rid of the 1e7 and 1e9 on both y-axes. The values of the two time series are in the millions and billions, so a delimiter for the number would be a plus for readability.
I like to have a (light) grid in the background and at least normal lines on the axes.
I like to have a monthly scaling, not every 6 months on the x-axis
How can I add the legend below?
The current code is (transactions 1 and 2 are time series of trading volumes):
ax = data.transactions1.plot(figsize=(12, 3.5))
data.transactions2.plot(secondary_y=True)
The following code :
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import datetime
from matplotlib.ticker import ScalarFormatter
base = datetime.datetime.today()
numdays = 365
date_list = [base - datetime.timedelta(days=x) for x in range(0, numdays)]
x = np.arange(0, numdays, 1)
values1 = 0.05 * x**2*1e9
values2 = -1*values1*1e7
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
lns1 = ax1.plot(date_list, values1, 'g-', label='Foo')
lns2 = ax2.plot(date_list, values2, 'b-', label='Bar')
# We set the date format
dareFmt = mdates.DateFormatter('%b %Y')
# We then apply the format
ax1.xaxis.set_major_formatter(dareFmt)
ax1.set_xlabel('Dates')
#used to give the inclination
fig.autofmt_xdate()
# Dsiplay the grid
ax1.grid(True)
# To get rid of the 1eX on top i divide the values of the y axis by the exponent value
y_values = ax1.get_yticks().tolist()
y_values = [x / 1e12 for x in y_values]
ax1.set_yticklabels(y_values)
ax1.set_ylabel('10e12')
y_values = ax2.get_yticks().tolist()
y_values = [x / 1e19 for x in y_values]
ax2.set_yticklabels(y_values)
ax2.set_ylabel('10e19')
lns = lns1 + lns2
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs,bbox_to_anchor=(0., -0.25, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
gives you :

Categories

Resources