I want to code a Pyplot graph where it changes colors below the negative mark. I have conditions stated at the Color Conditions section. how would i be able to get this to work?
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
df = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
CumSum_val = np.cumsum(df['col1'])
datetime = pd.to_datetime(df['date'])
#Color conditions
green_cond = np.where(CumSum_val > 0)
red_cond = np.where(CumSum_val <= 0)
# Define the date format
date_form = DateFormatter("%m-%d")
plt.xaxis.set_major_formatter(date_form)
plt.plot(datetime[green_cond],CumSum_val[green_cond], color = 'g')
plt.plot(datetime[red_cond],CumSum_val[red_cond], color = 'r')
plt.show()
You can try plot three lines with different color, the latter has prior priority to show color.
green_cond = CumSum_val > 0
red_cond = CumSum_val <= 0
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(datetime, CumSum_val, color='b')
ax.plot(datetime[green_cond], CumSum_val[green_cond], color='g')
ax.plot(datetime[red_cond], CumSum_val[red_cond], color='r')
# Define the date format
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
plt.show()
The data you provided doesn't cross zero, so I took the liberty to modify this value to a given threshold, 15 in the below example. You can change it to whatever value you like.
The idea is to insert a new point every time the line crosses the threshold. Linear interpolation was used to achieve it. Then, to properly plot the two lines you need to select the points with:
green_cond = CumSum_val >= threshold
red_cond = CumSum_val <= threshold
Note that I used >= and <= because I want the lines to share the common threshold point.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter, date2num, num2date
df = pd.DataFrame({'col1': [4, 5, 2, 2, 3, 5, 1, 1, 6],
'label':['Old','Old','Old','Old','Old','Old','Old','Old','Old'],
'date': ['2022-01-24 10:07:02', '2022-01-27 01:55:03', '2022-01-30 19:09:03', '2022-02-02 14:34:06',
'2022-02-08 12:37:03', '2022-02-10 03:07:02', '2022-02-10 14:02:03', '2022-02-11 00:32:25',
'2022-02-12 21:42:03']})
def modify_coords(x, y, y_lim):
"""If a line segment defined by `(x1, y1) -> (x2, y2)` intercepts
a limiting y-value, divide this segment by inserting a new point
such that y_newpoint = y_lim.
"""
xv, yv = [x[0]], [y[0]]
for i in range(len(x) - 1):
xc, xn = x[i:i+2]
yc, yn = y[i:i+2]
if ((yc < y_lim) and (yn > y_lim)) or ((yc > y_lim) and (yn < y_lim)):
xv.append(((y_lim - yc) / ((yn - yc) / (xn - xc))) + xc)
yv.append(y_lim)
xv.append(xn)
yv.append(yn)
return np.array(xv), np.array(yv)
CumSum_val = np.cumsum(df['col1'])
datetime = pd.to_datetime(df['date'])
datenum = date2num(datetime)
threshold = 15
datenum, CumSum_val = modify_coords(datenum, CumSum_val, threshold)
datetime = np.array(num2date(datenum))
#Color conditions
green_cond = CumSum_val >= threshold
red_cond = CumSum_val <= threshold
fig, ax = plt.subplots()
# Define the date format
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)
ax.plot(datetime[green_cond], CumSum_val[green_cond], color = 'g')
ax.plot(datetime[red_cond], CumSum_val[red_cond], color = 'r')
plt.show()
Related
I'm using matplotlib to create a density and blox plot but when I run my code, I get one graph with two plots overlapping each other. How can I restructure my code to output two separate individual graphs?
link to graph image:
https://ibb.co/6bCK9MZ
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
def make_t_distribution(sample_size, mean, sd):
t_sample = stats.t.rvs(sample_size - 1, mean, sd, sample_size) # Random t-distribution sample
sample_mean = np.mean(t_sample) # sample mean
sample_std = np.std(t_sample) # sample standard deviation
t_dist = stats.t(df = sample_size - 1, loc = sample_mean, scale = sample_std) # make a t-distribution based on the sample
x_axis = np.linspace(t_dist.ppf(0.0001), t_dist.ppf(0.9999), 500) # Generate an x-axis based on t-quantile values
return t_dist, x_axis
def make_prob_plot():
ax = plt.axes()
tdist1, x1=make_t_distribution(10,0,2)
tdist2, x2=make_t_distribution(100,0,2)
tdist3, x3=make_t_distribution(1000,0,2)
tdist4, x4=make_t_distribution(10000,0,2)
tdist5, x5=make_t_distribution(500,0,2)
# density plot
plt.xlim(-7.5,7.5)
y1=ax.plot(x1,tdist1.pdf(x1), '-', label="$df=9$")
y2=ax.plot(x2,tdist2.pdf(x2), ':', label="$df=99$")
y3=ax.plot(x3,tdist3.pdf(x3), '--' ,label="$df=999$")
y4=ax.plot(x4,tdist4.pdf(x4), '-.', label="$df=9999$")
y5=ax.plot(x5,tdist5.pdf(x5), '.', label="$Normal$")
plt.xlabel("Value")
plt.ylabel("Density")
plt.title("PDF Distribution Comparison $N(\mu=0$, $\sigma=2$)")
plt.legend()
# boxplot
dist1 = np.random.normal(0,2,10)
dist2 = np.random.normal(0,2,100)
dist3 = np.random.normal(0,2,1000)
dist4 = np.random.normal(0,2,10000)
distributions = (dist1, dist2, dist3, dist4)
plt.boxplot(distributions, labels = ("df=9","df=99","df=999","df=9999"));
plt.boxplot(distributions, widths= .7);
green_diamond = dict(markerfacecolor='g', marker='D')
plt.boxplot(distributions, flierprops=green_diamond);
return plt
make_prob_plot()
The cleanest way is to use side by side figures using plt.subplot.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy.stats as stats
def make_t_distribution(sample_size, mean, sd):
t_sample = stats.t.rvs(sample_size - 1, mean, sd, sample_size) # Random t-distribution sample
sample_mean = np.mean(t_sample) # sample mean
sample_std = np.std(t_sample) # sample standard deviation
t_dist = stats.t(df=sample_size - 1, loc=sample_mean, scale=sample_std) # make a t-distribution based on the sample
x_axis = np.linspace(t_dist.ppf(0.0001), t_dist.ppf(0.9999), 500) # Generate an x-axis based on t-quantile values
return t_dist, x_axis
def make_prob_plot():
figure, axis = plt.subplots(2,1)
tdist1, x1 = make_t_distribution(10, 0, 2)
tdist2, x2 = make_t_distribution(100, 0, 2)
tdist3, x3 = make_t_distribution(1000, 0, 2)
tdist4, x4 = make_t_distribution(10000, 0, 2)
tdist5, x5 = make_t_distribution(500, 0, 2)
# density plot
plt.xlim(-7.5, 7.5)
y1 = axis[0].plot(x1, tdist1.pdf(x1), '-', label="$df=9$")
y2 = axis[0].plot(x2, tdist2.pdf(x2), ':', label="$df=99$")
y3 = axis[0].plot(x3, tdist3.pdf(x3), '--', label="$df=999$")
y4 = axis[0].plot(x4, tdist4.pdf(x4), '-.', label="$df=9999$")
y5 = axis[0].plot(x5, tdist5.pdf(x5), '.', label="$Normal$")
plt.xlabel("Value")
plt.ylabel("Density")
plt.title("PDF Distribution Comparison $N(\mu=0$, $\sigma=2$)")
axis[0].legend()
# boxplot
dist1 = np.random.normal(0, 2, 10)
dist2 = np.random.normal(0, 2, 100)
dist3 = np.random.normal(0, 2, 1000)
dist4 = np.random.normal(0, 2, 10000)
distributions = (dist1, dist2, dist3, dist4)
axis[1].boxplot(distributions, labels=("df=9", "df=99", "df=999", "df=9999"));
axis[1].boxplot(distributions, widths=.7);
green_diamond = dict(markerfacecolor='g', marker='D')
axis[1].boxplot(distributions, flierprops=green_diamond);
return plt
make_prob_plot()
I'm trying to fill between two adjacent intervals:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
ax.fill_between(df.index[df.Color == 'r'],
df.Value[df.Color == 'r'], color='r')
ax.fill_between(df.index[df.Color == 'g'],
df.Value[df.Color == 'g'], color='g')
Which gives me:
However, I would like to color the interval between index 5 and 6 as well (red).
How can I do this?
You not only want to fill where the Color is r, but also where the color changes from red to green. You may formulate this as a condition and use the where argument to fill_between.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
cond = df.Color == 'r'
cond2 = cond ^ np.concatenate(([0],np.diff(cond.astype(int)) == -1))
ax.fill_between(df.index, df.Value, where=cond2, color='r')
ax.fill_between(df.index, df.Value, where=~cond, color='g')
plt.show()
While this works fine in this case, it will fail when single intervals are involved. In that case you cannot use fill_between.
A most general solution for colorizing the area below a curve in the interval following a point could be the following. This creates a polygon of vertices based on the condition and plots a collection of those polygons in the respective color.
In this example a couple of test cases are shown:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
t1 = ['r','r','r','r','r','r','g','g','g','g']
t2 = ['r','g','r','r','g','r','g','g','r','g']
t3 = ['g','g','r','r','g','r','g','g','r','r']
t4 = ['g','r','g','g','r','g','g','g','g','r']
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color1': t1, "Color2": t2, 'Color3': t3, "Color4": t4})
def fill_intervals_post(x,y, color, cond, ax=None):
ax = ax or plt.gca()
cond1 = color == cond
start = np.diff(cond1.astype(int)) == 1
end = np.diff(cond1.astype(int)) == -1
inxstart = np.arange(len(color)-1)[start]+1
inxend = np.arange(len(color)-1)[end]+2
inx = np.concatenate(([0,0],np.sort(np.concatenate((inxstart,inxend)))))
xs = np.split(x, inx)[(~cond1).astype(int)[0]::2]
ys = np.split(y, inx)[(~cond1).astype(int)[0]::2]
verts = []
for xi,yi in zip(xs,ys):
xsi = np.concatenate((xi, xi[::-1]))
ysi = np.concatenate((yi, np.zeros_like(yi)))
verts.append(np.c_[xsi,ysi])
p = PolyCollection(verts, color=cond)
ax.add_collection(p)
fig,axes = plt.subplots(4,1,figsize=(8,9))
for i,ax in enumerate(axes):
ax.plot(df.index.values, df.Value.values, color="k")
ax.set_title(df["Color{}".format(i+1)].values)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "r", ax=ax)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "g", ax=ax)
fig.tight_layout()
plt.show()
I'm trying to fill the area under the curve where the y-value is 1. The x-axis is a datetime array with non-regular values. As you can see the fill also includes areas where there is no x-data. Is there a way to tell fill_between to only fill "between" valid data? i.e. in the plot below I'd like the areas between "missing" samples to be white
tx = array(datetimes) # Array of irregular datetimes
ty = array([ones and zeros]) # Array of ones and zeros same size as tx
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(tx, ty, 'r.')
ax.fill_between(tx, 0, 1, where(ty==1))
This might be what you're aiming for.
If it is then you can use rectangular patches. In this code:
y is the list of values meant to correspond to your 'irregular' pattern.
delta_float measures the horizontal distance in the graph corresponding to delta, the distance between ticks.
Notice that the patches are positioned and sized based on dates and delta_float units respectively.
import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange, date2num
from numpy import arange
date1 = datetime.datetime(2000, 3, 2)
date2 = datetime.datetime(2000, 3, 6)
delta = datetime.timedelta(hours=6)
dates = drange(date1, date2, delta)
delta_float = (dates[-1]-dates[0])/len(dates)
y = [1,1,0,0,1,1,1,0,1,1,0,0,1,1,0,0]
fig, ax = plt.subplots()
ax.plot_date(dates, y, 'r.')
ax.add_patch(patches.Rectangle((dates[0], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[4], 0), 2*delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[8], 0), delta_float, 1, color='grey'))
ax.add_patch(patches.Rectangle((dates[12], 0), delta_float, 1, color='grey'))
ax.xaxis.set_major_locator(DayLocator())
ax.xaxis.set_minor_locator(HourLocator(arange(0, 25, 6)))
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
ax.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
fig.autofmt_xdate()
plt.show()
i want to make graph using matplotlib in python.
np.load(name.npy')
i searched many things and i tried
for example..just...
x = [dt.datetime(2003, 05, 01), dt.datetime(2008, 06, 01)]
df = np.load(r'file')
y = df
Replace the end date on the date-range to your desired graph, and the 'y' should be array loaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
start_date = '2003-05-01'
y = np.load(r'c:\python27\abnormal.npy')
x = pd.date_range(start=start_date, periods=len(y), freq='D')
plt.plot(x,y,'.')
plt.show()
If your input array does not contain ordered pairs such as [(0,1), (1,1), (2,2)] and only contains one set of numbers '[1, 2, 3, 4]`, you neeed to create a set of x-coordinates. For a time series in days, you could try something like this:
import datetime
import numpy as np
import matplotlib.pyplot as plt
def getData(fileName):
# Load the data file to serve as y-axis coordinates
y = np.load(fileName)
# For each y coordinate we need an x coordinate
time_offset = list(range(len(y)))
# Convert time_offset to a time-series
# We will assume x-values equal number of days since a beginDate
x = []
beginDate = datetime.date(2015, 6, 1) # The date to begin our time series
for n in time_offset:
date = beginDate + datetime.timedelta(n) # Date + number_of_Days_passed
x.append(date)
return x, y
def plot(x, y):
# Plot the data
fig = plt.figure()
ax = plt.subplot2grid((1,1), (0,0), rowspan=1, colspan=1)
ax.scatter(x, y)
for label in ax.xaxis.get_ticklabels():
label.set_rotation(90)
ax.grid(True)
plt.subplots_adjust(left=.10, bottom=.19, right=.93, top=.95, wspace=.20, hspace=0)
plt.show()
x, y = getData('abnormal.npy')
plot(x, y)
I want to produce in python with matplotlib/pyplot
a bar chart with a fill depending on the value.
legend color bar
while keeping module dependencies at a minimum.
Is there something simpler than:
import matplotlib.pyplot as plt
def color_gradient ( val, beg_rgb, end_rgb, val_min = 0, val_max = 1):
val_scale = (1.0 * val - val_min) / (val_max - val_min)
return ( beg_rgb[0] + val_scale * (end_rgb[0] - beg_rgb[0]),
beg_rgb[1] + val_scale * (end_rgb[1] - beg_rgb[1]),
beg_rgb[2] + val_scale * (end_rgb[2] - beg_rgb[2]))
# -----------------------------------------------
x_lbls = [ "09:00", "09:15", "10:10"]
y_vals = [ 7, 9, 5]
plt_idx = np.arange( len( x_lbls))
bar_wd = 0.35
grad_beg, grad_end = ( 0.5, 0.5, 0.5), (1, 1, 0)
col_list = [ color_gradient( val,
grad_beg,
grad_end,
min( y_vals),
max( y_vals)) for val in y_vals]
plt.bar( plt_idx, y_vals, color = col_list)
plt.xticks( plt_idx + bar_wd, x_lbls)
plt.show()
this is still missing the legend color bar
my solution in R with ggplot would be:
library(ggplot2)
df = data.frame( time = 1:10, vals = abs(rnorm( n = 10)))
ggplot( df, aes( x = time, y = vals, fill = vals)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low="#888888",high="#FFFF00")
and produces the desired output:
I couldn't figure out how to get the colorbar to work without plotting something else and then clearing it, so it's not the most elegant solution.
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
y = np.array([1, 4, 3, 2, 7, 11])
colors = cm.hsv(y / float(max(y)))
plot = plt.scatter(y, y, c = y, cmap = 'hsv')
plt.clf()
plt.colorbar(plot)
plt.bar(range(len(y)), y, color = colors)
plt.show()
You can use Normalize and ScalarMappable without plotting a scatter. For example:
import matplotlib mpl
import matplotlib.pyplot as plt
from matplotlib import cm
f,(ax1,ax2) = plt.subplots(2)
#ax1 --> plot here your bar chart
norm = mpl.colors.Normalize(vmin=0, vmax=1)
mpl.colorbar.ColorbarBase(ax2, cmap=cm.RdBu,
norm=norm,
orientation='horizontal')
Finally, add the desired format to the colorbar.