i want to make graph using matplotlib in python.
np.load(name.npy')
i searched many things and i tried
for example..just...
x = [dt.datetime(2003, 05, 01), dt.datetime(2008, 06, 01)]
df = np.load(r'file')
y = df
Replace the end date on the date-range to your desired graph, and the 'y' should be array loaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
start_date = '2003-05-01'
y = np.load(r'c:\python27\abnormal.npy')
x = pd.date_range(start=start_date, periods=len(y), freq='D')
plt.plot(x,y,'.')
plt.show()
If your input array does not contain ordered pairs such as [(0,1), (1,1), (2,2)] and only contains one set of numbers '[1, 2, 3, 4]`, you neeed to create a set of x-coordinates. For a time series in days, you could try something like this:
import datetime
import numpy as np
import matplotlib.pyplot as plt
def getData(fileName):
# Load the data file to serve as y-axis coordinates
y = np.load(fileName)
# For each y coordinate we need an x coordinate
time_offset = list(range(len(y)))
# Convert time_offset to a time-series
# We will assume x-values equal number of days since a beginDate
x = []
beginDate = datetime.date(2015, 6, 1) # The date to begin our time series
for n in time_offset:
date = beginDate + datetime.timedelta(n) # Date + number_of_Days_passed
x.append(date)
return x, y
def plot(x, y):
# Plot the data
fig = plt.figure()
ax = plt.subplot2grid((1,1), (0,0), rowspan=1, colspan=1)
ax.scatter(x, y)
for label in ax.xaxis.get_ticklabels():
label.set_rotation(90)
ax.grid(True)
plt.subplots_adjust(left=.10, bottom=.19, right=.93, top=.95, wspace=.20, hspace=0)
plt.show()
x, y = getData('abnormal.npy')
plot(x, y)
Related
I want to code a Pyplot graph where it changes colors below the negative mark. I have conditions stated at the Color Conditions section. how would i be able to get this to work?
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
df = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
CumSum_val = np.cumsum(df['col1'])
datetime = pd.to_datetime(df['date'])
#Color conditions
green_cond = np.where(CumSum_val > 0)
red_cond = np.where(CumSum_val <= 0)
# Define the date format
date_form = DateFormatter("%m-%d")
plt.xaxis.set_major_formatter(date_form)
plt.plot(datetime[green_cond],CumSum_val[green_cond], color = 'g')
plt.plot(datetime[red_cond],CumSum_val[red_cond], color = 'r')
plt.show()
You can try plot three lines with different color, the latter has prior priority to show color.
green_cond = CumSum_val > 0
red_cond = CumSum_val <= 0
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(datetime, CumSum_val, color='b')
ax.plot(datetime[green_cond], CumSum_val[green_cond], color='g')
ax.plot(datetime[red_cond], CumSum_val[red_cond], color='r')
# Define the date format
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
plt.show()
The data you provided doesn't cross zero, so I took the liberty to modify this value to a given threshold, 15 in the below example. You can change it to whatever value you like.
The idea is to insert a new point every time the line crosses the threshold. Linear interpolation was used to achieve it. Then, to properly plot the two lines you need to select the points with:
green_cond = CumSum_val >= threshold
red_cond = CumSum_val <= threshold
Note that I used >= and <= because I want the lines to share the common threshold point.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter, date2num, num2date
df = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def modify_coords(x, y, y_lim):
"""If a line segment defined by `(x1, y1) -> (x2, y2)` intercepts
a limiting y-value, divide this segment by inserting a new point
such that y_newpoint = y_lim.
"""
xv, yv = [x[0]], [y[0]]
for i in range(len(x) - 1):
xc, xn = x[i:i+2]
yc, yn = y[i:i+2]
if ((yc < y_lim) and (yn > y_lim)) or ((yc > y_lim) and (yn < y_lim)):
xv.append(((y_lim - yc) / ((yn - yc) / (xn - xc))) + xc)
yv.append(y_lim)
xv.append(xn)
yv.append(yn)
return np.array(xv), np.array(yv)
CumSum_val = np.cumsum(df['col1'])
datetime = pd.to_datetime(df['date'])
datenum = date2num(datetime)
threshold = 15
datenum, CumSum_val = modify_coords(datenum, CumSum_val, threshold)
datetime = np.array(num2date(datenum))
#Color conditions
green_cond = CumSum_val >= threshold
red_cond = CumSum_val <= threshold
fig, ax = plt.subplots()
# Define the date format
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
ax.plot(datetime[green_cond], CumSum_val[green_cond], color = 'g')
ax.plot(datetime[red_cond], CumSum_val[red_cond], color = 'r')
plt.show()
The person who made this had used dates in the second graph. I was wondering how would dates be used with the scipy.signal.argrelextrema function.
With this code it doesn't do anything it prints out an empty array for peak_x and peak_y:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
Here's the link for the original code:
https://openwritings.net/pg/python/python-find-peaks-and-valleys-chart-using-scipysignalargrelextrema
import matplotlib
matplotlib.use('Agg') # Bypass the need to install Tkinter GUI framework
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
# Generate random data.
data_x = np.arange(start = 0, stop = 25, step = 1, dtype='int')
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25) #edited part
# Find peaks(max).
peak_indexes = signal.argrelextrema(data_y, np.greater)
peak_indexes = peak_indexes[0]
# Find valleys(min).
valley_indexes = signal.argrelextrema(data_y, np.less)
valley_indexes = valley_indexes[0]
# Plot main graph.
(fig, ax) = plt.subplots()
ax.plot(data_x, data_y)
# Plot peaks.
peak_x = peak_indexes
peak_y = data_y[peak_indexes]
ax.plot(peak_x, peak_y, marker='o', linestyle='dashed', color='green', label="Peaks")
print(peak_x,peak_y)
# Plot valleys.
valley_x = valley_indexes
valley_y = data_y[valley_indexes]
ax.plot(valley_x, valley_y, marker='o', linestyle='dashed', color='red', label="Valleys")
# Save graph to file.
plt.title('Find peaks and valleys using argrelextrema()')
plt.legend(loc='best')
plt.savefig('argrelextrema.png')
Here's the example how it would work:
You're going to want to use the xticks method. See below:
import matplotlib.pyplot as plt
names = [str(i) for i in range(20)]
x_data = [x for x in range(20)]
y_data = [x for x in range(20)]
plt.plot(x_data, y_data)
plt.xticks(x_data, label=names)
plt.show()
What this does is use an integer between 1-19 cast as a string as the label for the axis X.
Except in your case you want to swap out the names for datatime objects cast to strings. For the xticks, the x_data element prescribes where the ticks will be. You may use any interval of points so long as they are within the bounds of the xdata.
In your case, replace:
data_y = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
with
data_y_ticks = np.array('2015-07-04', dtype=np.datetime64) + np.arange(25)
data_y = [i for i, _ in enumerate(data_y_ticks.tolist())]
then plot as follows:
plt.plot(data_y, x_data)
plt.xticks(data_y, label=data_y_ticks)
plt.show()
Just a heads-up, your X and Y axis names are flipped in your code. I did not correct this in my example, however did interchange their locations in the plot to make the plot make sense.
I'm trying to fill the area under the curve where the y-value is 1. The x-axis is a datetime array with non-regular values. As you can see the fill also includes areas where there is no x-data. Is there a way to tell fill_between to only fill "between" valid data? i.e. in the plot below I'd like the areas between "missing" samples to be white
tx = array(datetimes) # Array of irregular datetimes
ty = array([ones and zeros]) # Array of ones and zeros same size as tx
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(tx, ty, 'r.')
ax.fill_between(tx, 0, 1, where(ty==1))
This might be what you're aiming for.
If it is then you can use rectangular patches. In this code:
y is the list of values meant to correspond to your 'irregular' pattern.
delta_float measures the horizontal distance in the graph corresponding to delta, the distance between ticks.
Notice that the patches are positioned and sized based on dates and delta_float units respectively.
import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange, date2num
from numpy import arange
date1 = datetime.datetime(2000, 3, 2)
date2 = datetime.datetime(2000, 3, 6)
delta = datetime.timedelta(hours=6)
dates = drange(date1, date2, delta)
delta_float = (dates[-1]-dates[0])/len(dates)
y = [1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,0]
fig, ax = plt.subplots()
ax.plot_date(dates, y, 'r.')
ax.add_patch(patches.Rectangle((dates[0], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[4], 0), 2*delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[8], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[12], 0), delta_float, 1, color='grey'))
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_minor_locator(HourLocator(arange(0, 25, 6)))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
ax.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
fig.autofmt_xdate()
plt.show()
I wanted to perform arithmetic operations on dates so i converted these dates
idx_1 = 2017-06-07 00:00:00
idx_2 = 2017-07-27 00:00:00
to floats using,
x1 = time.mktime(idx_1.timetuple()) # returns float of dates
>>> 1496773800.0
x2 = time.mktime(idx_2.timetuple())
>>> 1501093800.0
y1 = 155.98
y2 = 147.07
Am using the following code to plot:
import datetime as dt
import time
import numpy as np
import matplotlib.pyplot as plt
x = [x1, x2]
y = [y1, y2]
Difference = x2 - x1 #this helps to end the plotted line at specific point
coefficients = np.polyfit(x, y, 1)
polynomial = np.poly1d(coefficients)
# the np.linspace lets you set number of data points, line length.
x_axis = np.linspace(x1, x2 + Difference, 3) # linspace(start, end, num)
y_axis = polynomial(x_axis)
plt.plot(x_axis, y_axis)
plt.plot(x[0], y[0], 'go')
plt.plot(x[1], y[1], 'go')
plt.show()
Which plots:
How to make matplotlib to plot the actual dates on x axis instead of floats?
Any kind of Help is Greatly Appreciated.
Starting with datetime objects you may use matplotlib's date2num and num2date functions to convert to and from numerical values. The advantage is that the numerical data is then understood by matplotlib.dates locators and formatters.
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates
idx_1 = datetime.datetime(2017,06,07,0,0,0)
idx_2 = datetime.datetime(2017,07,27,0,0,0)
idx = [idx_1, idx_2]
y1 = 155.98
y2 = 147.07
x = matplotlib.dates.date2num(idx)
y = [y1, y2]
Difference = x[1] - x[0] #this helps to end the plotted line at specific point
coefficients = np.polyfit(x, y, 1)
polynomial = np.poly1d(coefficients)
# the np.linspace lets you set number of data points, line length.
x_axis = np.linspace(x[0], x[1] + Difference, 3) # linspace(start, end, num)
y_axis = polynomial(x_axis)
plt.plot(x_axis, y_axis)
plt.plot(x[0], y[0], 'go')
plt.plot(x[1], y[1], 'go')
loc= matplotlib.dates.AutoDateLocator()
plt.gca().xaxis.set_major_locator(loc)
plt.gca().xaxis.set_major_formatter(matplotlib.dates.AutoDateFormatter(loc))
plt.gcf().autofmt_xdate()
plt.show()
My current Pandas / python plot looks like this:
What I like to have:
I want to get rid of the 1e7 and 1e9 on both y-axes. The values of the two time series are in the millions and billions, so a delimiter for the number would be a plus for readability.
I like to have a (light) grid in the background and at least normal lines on the axes.
I like to have a monthly scaling, not every 6 months on the x-axis
How can I add the legend below?
The current code is (transactions 1 and 2 are time series of trading volumes):
ax = data.transactions1.plot(figsize=(12, 3.5))
data.transactions2.plot(secondary_y=True)
The following code :
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import datetime
from matplotlib.ticker import ScalarFormatter
base = datetime.datetime.today()
numdays = 365
date_list = [base - datetime.timedelta(days=x) for x in range(0, numdays)]
x = np.arange(0, numdays, 1)
values1 = 0.05 * x**2*1e9
values2 = -1*values1*1e7
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
lns1 = ax1.plot(date_list, values1, 'g-', label='Foo')
lns2 = ax2.plot(date_list, values2, 'b-', label='Bar')
# We set the date format
dareFmt = mdates.DateFormatter('%b %Y')
# We then apply the format
ax1.xaxis.set_major_formatter(dareFmt)
ax1.set_xlabel('Dates')
#used to give the inclination
fig.autofmt_xdate()
# Dsiplay the grid
ax1.grid(True)
# To get rid of the 1eX on top i divide the values of the y axis by the exponent value
y_values = ax1.get_yticks().tolist()
y_values = [x / 1e12 for x in y_values]
ax1.set_yticklabels(y_values)
ax1.set_ylabel('10e12')
y_values = ax2.get_yticks().tolist()
y_values = [x / 1e19 for x in y_values]
ax2.set_yticklabels(y_values)
ax2.set_ylabel('10e19')
lns = lns1 + lns2
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs,bbox_to_anchor=(0., -0.25, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
gives you :