fill_between does not color adjacent intervals - python

I'm trying to fill between two adjacent intervals:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
ax.fill_between(df.index[df.Color == 'r'],
df.Value[df.Color == 'r'], color='r')
ax.fill_between(df.index[df.Color == 'g'],
df.Value[df.Color == 'g'], color='g')
Which gives me:
However, I would like to color the interval between index 5 and 6 as well (red).
How can I do this?

You not only want to fill where the Color is r, but also where the color changes from red to green. You may formulate this as a condition and use the where argument to fill_between.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
cond = df.Color == 'r'
cond2 = cond ^ np.concatenate(([0],np.diff(cond.astype(int)) == -1))
ax.fill_between(df.index, df.Value, where=cond2, color='r')
ax.fill_between(df.index, df.Value, where=~cond, color='g')
plt.show()
While this works fine in this case, it will fail when single intervals are involved. In that case you cannot use fill_between.
A most general solution for colorizing the area below a curve in the interval following a point could be the following. This creates a polygon of vertices based on the condition and plots a collection of those polygons in the respective color.
In this example a couple of test cases are shown:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
t1 = ['r','r','r','r','r','r','g','g','g','g']
t2 = ['r','g','r','r','g','r','g','g','r','g']
t3 = ['g','g','r','r','g','r','g','g','r','r']
t4 = ['g','r','g','g','r','g','g','g','g','r']
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color1': t1, "Color2": t2, 'Color3': t3, "Color4": t4})
def fill_intervals_post(x,y, color, cond, ax=None):
ax = ax or plt.gca()
cond1 = color == cond
start = np.diff(cond1.astype(int)) == 1
end = np.diff(cond1.astype(int)) == -1
inxstart = np.arange(len(color)-1)[start]+1
inxend = np.arange(len(color)-1)[end]+2
inx = np.concatenate(([0,0],np.sort(np.concatenate((inxstart,inxend)))))
xs = np.split(x, inx)[(~cond1).astype(int)[0]::2]
ys = np.split(y, inx)[(~cond1).astype(int)[0]::2]
verts = []
for xi,yi in zip(xs,ys):
xsi = np.concatenate((xi, xi[::-1]))
ysi = np.concatenate((yi, np.zeros_like(yi)))
verts.append(np.c_[xsi,ysi])
p = PolyCollection(verts, color=cond)
ax.add_collection(p)
fig,axes = plt.subplots(4,1,figsize=(8,9))
for i,ax in enumerate(axes):
ax.plot(df.index.values, df.Value.values, color="k")
ax.set_title(df["Color{}".format(i+1)].values)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "r", ax=ax)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "g", ax=ax)
fig.tight_layout()
plt.show()

Related

How to filter data while drawing?

I have a dataframe which I drawed as you can see the figure and codes below;
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
df = pd.read_excel('nötronn.xlsx')
fig, ax = plt.subplots(figsize=(20,40))
ax1 = plt.subplot2grid((1,5), (0,0), rowspan=1, colspan = 1)
ax1.plot(df["N/F*10"], df['Depth'], color = "green", linewidth = 0.5)
ax1.set_xlabel("Porosity")
ax1.xaxis.label.set_color("green")
ax1.set_xlim(10, 50)
ax1.set_ylabel("Depth (m)")
ax1.tick_params(axis='x', colors="green")
ax1.spines["top"].set_edgecolor("green")
ax1.title.set_color('green')
ax1.set_xticks([10, 20, 30, 40, 50])
I want to filter data so that I can realize the differences better. I tried these:
z = np.polyfit(df["N/F*10"], df['Depth'], 2)
p = np.poly1d(z)
plt.plot(df["N/F*10"], p(df["N/F*10"]))
But it gives :LinAlgError: SVD did not converge in Linear Least Squares
How can I solve it? Thanks.
Output expectation:
This works!
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from statsmodels.nonparametric.smoothers_lowess import lowess
data = pd.read_excel('nötronn.xlsx')
sub_data = data[data['Depth'] > 21.5]
result = lowess(sub_data['Eksi'], sub_data['Depth'].values)
x_smooth = result[:,0]
y_smooth = result[:,1]
tot_result = lowess(data['Eksi'], data['Depth'].values, frac=0.01)
x_tot_smooth = tot_result[:,0]
y_tot_smooth = tot_result[:,1]
fig, ax = plt.subplots(figsize=(20, 8))
##ax.plot(data.depth.values, data['N/F*10'], label="raw")
ax.plot(x_tot_smooth, y_tot_smooth, label="lowess 1%", linewidth=3, color="g")
ax.plot(data['GR-V121B-ETi'])
ax.plot(data['Caliper'], linestyle = 'dashed')

Setting color of scatter according to the in matplotlib

Why are all points hotpink, please? Where is a mistake. The desired result is that each point is with different colour according to the first column. Thanks
file:
fer00001.txt -30.5598 1 51183.7316 0.0 0.88884
her00034.txt -12.9113 1 50124.7613 0.0 0.93370
occ00043.txt -37.9350 1 50094.5721 0.0 0.94562
omc15564.txt -9.53485 1 51576.4297 0.0 0.56777
Code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
BVS, RV = np.loadtxt('file', unpack=True, usecols=[1, 5])
sp = np.loadtxt('file', unpack=True, dtype='str', usecols=[0])
kratke = [w[:3] for w in sp]
fig, ax = plt.subplots(figsize=[10,6.5])
string = [i.replace(',', ', ').replace('fer', 'FEROS').replace('her', 'HEROS').replace('occ', 'CES').replace('omc', 'RETICON') for i in kratke]
d = {'BVS': BVS, 'RV': RV, 'sp': string}
df = pd.DataFrame(data=d)
colors = {'FEROS': 'purple', 'HEROS': 'blue', 'CES': 'green', 'RETICON' : 'hotpink'}
for i in np.unique(df['sp']):
color = colors[i]
df1 = df[df['sp'] == i]
ax.scatter(df['BVS'], df['RV'], color=color, marker='o', s=6, label=i)
plt.show()
EDIT after advice
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
BVS, RV = np.loadtxt('file', unpack=True, usecols=[1, 5])
sp = np.loadtxt('file', unpack=True, dtype='str', usecols=[0])
kratke = [w[:3] for w in sp]
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=[10,6.5])
string = [i.replace(',', ', ').replace('fer', 'FEROS').replace('her', 'HEROS').replace('occ', 'CES').replace('omc', 'RETICON') for i in kratke]
d = {'BVS': BVS, 'RV': RV, 'sp': string}
df = pd.DataFrame(data=d)
colors = {'FEROS': 'purple', 'HEROS': 'blue', 'CES': 'green', 'RETICON' : 'hotpink'}
for i in np.unique(df['sp']):
color = colors[i]
df1 = df[df['sp'] == i]
ax1.scatter(df1['BVS'], df1['RV'], color=color, marker='o', s=6, label=i)
ax2.scatter(df1['BVS'][0:2], df1['RV'][0:2], color=color, marker='o', s=6, label=i)
plt.show()
Why slider does not work, please? All points are displayed again. In my original data, not points are displayed then.
The main issue is ax.scatter(df['BVS'], df['RV'], color=color, marker='o', s=6, label=i) should be ax.scatter(df1['BVS'], df1['RV'], color=color, marker='o', s=6, label=i)
df was plotted instead of df1
The same thing can be accomplished more easily with the following
import pandas as pd
import seaborn as sns # high level API for matplotlib
# load the columns with pandas instead of numpy
df = pd.read_csv('file', sep='\\s+', header=None, usecols=[0, 1, 5])
# name the columns
df.columns = ['sp', 'BVS', 'RV']
# use only the first 3 values of sp
df['sp'] = df['sp'].str[:3]
# sorted list of values to map sp to
mappings = ['FEROS', 'HEROS', 'CES', 'RETICON']
# create a dict mapping the unique values in sp to mappings
mapped = dict(zip(sorted(df['sp'].unique()), mappings))
# map sp to the new values
df['sp'] = df['sp'].map(mapped)
# display(df)
sp BVS RV
0 FEROS -30.55980 0.88884
1 HEROS -12.91130 0.93370
2 CES -37.93500 0.94562
3 RETICON -9.53485 0.56777
p = sns.scatterplot(data=df, x='BVS', y='RV', hue='sp')
p.legend(bbox_to_anchor=(1.01, 1.02), loc='upper left')

Integrating a histogram in a bootstrap simulation graph

I have a dataframe with 1000 simulations of a portfolio's returns. I am able to graph the simulations and do the respective histogram separately, but I have absolutely no idea how to merge them in order to resemble the following image:
please take this example of data in order to facilitate answers:
import numpy as np
import pandas as pd
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = []
for i in range(N):
columns.append(str(i+1))
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates,
columns=columns)
df=(1+simulate_panel(1000,1000)).cumprod()
df.plot(figsize=(8,6),title=('Bootstrap'), legend=False)
Thank you very much in advance.
To color the curves via their last value, they can be drawn one-by-one. With a colormap and a norm, the value can be converted to the appropriate color. Using some transparency (alpha), the most visited positions will be colored stronger.
In a second subplot, a vertical histogram can be drawn, with the bars colored similarly.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def simulate_panel(T, N):
"""" This function simulates return paths"""
dates = pd.date_range("20210218", periods=T, freq='D')
columns = [(str(i + 1)) for i in range(N)]
return pd.DataFrame(np.random.normal(0, 0.01, size=(T, N)), index=dates, columns=columns)
df = (1 + simulate_panel(1000, 1000)).cumprod()
fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True, figsize=(12, 4),
gridspec_kw={'width_ratios': [5, 1], 'wspace': 0})
data = df.to_numpy().T
cmap = plt.cm.get_cmap('turbo')
norm = plt.Normalize(min(data[:, -1]), max(data[:, -1]))
for row in data:
ax1.plot(df.index, row, c=cmap(norm(row[-1])), alpha=0.1)
ax1.margins(x=0)
_, bin_edges, bars = ax2.hist(data[:, -1], bins=20, orientation='horizontal')
for x0, x1, bar in zip(bin_edges[:-1], bin_edges[1:], bars):
bar.set_color(cmap(norm((x0 + x1) / 2)))
ax2.tick_params(left=False)
plt.tight_layout()
plt.show()
You can use GridSpec to set up axes for line chart and the histogram next to each other:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# layout
fig = plt.figure()
gs = fig.add_gridspec(1, 2, wspace=0, width_ratios=[9, 1])
ax = gs.subplots(sharey=True)
# line chart
z = df.iloc[-1]
df.plot(figsize=(8,6), title=('Bootstrap'), legend=False, ax=ax[0],
color=cm.RdYlBu_r((z - z.min()) / (z.max() - z.min())))
# histogram
n_bins = 20
cnt, bins, patches = ax[1].hist(
z, np.linspace(z.min(), z.max(), n_bins),
ec='k', orientation='horizontal')
colors = cm.RdYlBu_r((bins - z.min()) / (z.max() - z.min()))
for i, p in enumerate(patches):
p.set_color(colors[i])

Change figure position by mouse

I plotted two curves as shown in the following figure. I want to match the subplot curve (ax1) with one of the main curves by changing the position of the ax1 with the mouse in the X and Y direction. How i can do that?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
xl = pd.ExcelFile('Book2.xlsx')
column = xl.parse('Sheet1', names=['A', 'B', 'C'], header=None, index_col=False)
time = np.array(column['A'])
rate = np.array(column['B'])
fig, ax = plt.subplots()
tDd = np.arange(10e-4, 10e2, 1)
plt.plot()
b = np.arange(0, 1, 0.1)
index = 0
for i in tDd:
q_Dd = [np.exp(-tDd) if b == 0 else (1+b*tDd)**(-1/b) for b in b]
ax.loglog(tDd,q_Dd[index], 'b-')
if index <= 8: index += 1
plt.xlim(10e-4, 10e1)
plt.ylim(10e-4, 10e0)
ax1 = fig.add_axes([0.3, 0.3, 0.5, 0.5])
ax1.patch.set_alpha(0.5)
ax1.loglog(time,rate,'.')

How to update artists in scrollable, matplotlib and multiplot

I'm trying to create a scrollable multiplot based on the answer to this question:
Creating a scrollable multiplot with python's pylab
Lines created using ax.plot() are updating correctly, however I'm unable to figure out how to update artists created using xvlines() and fill_between().
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.widgets import Slider
#create dataframes
dfs={}
for x in range(100):
col1=np.random.normal(10,0.5,30)
col2=(np.repeat([5,8,7],np.round(np.random.dirichlet(np.ones(3),size=1)*31)[0].tolist()))[:30]
col3=np.random.randint(4,size=30)
dfs[x]=pd.DataFrame({'col1':col1,'col2':col2,'col3':col3})
#create figure,axis,subplot
fig = plt.figure()
gs = gridspec.GridSpec(1,1,hspace=0,wspace=0,left=0.1,bottom=0.1)
ax = plt.subplot(gs[0])
ax.set_ylim([0,12])
#slider
frame=0
axframe = plt.axes([0.13, 0.02, 0.75, 0.03])
sframe = Slider(axframe, 'frame', 0, 99, valinit=0,valfmt='%d')
#plots
ln1,=ax.plot(dfs[0].index,dfs[0]['col1'])
ln2,=ax.plot(dfs[0].index,dfs[0]['col2'],c='black')
#artists
ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==5,facecolor='r',edgecolors='none',alpha=0.5)
ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==8,facecolor='b',edgecolors='none',alpha=0.5)
ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==7,facecolor='g',edgecolors='none',alpha=0.5)
ax.vlines(x=dfs[0]['col3'].index,ymin=0,ymax=dfs[0]['col3'],color='black')
#update plots
def update(val):
frame = np.floor(sframe.val)
ln1.set_ydata(dfs[frame]['col1'])
ln2.set_ydata(dfs[frame]['col2'])
ax.set_title('Frame ' + str(int(frame)))
plt.draw()
#connect callback to slider
sframe.on_changed(update)
plt.show()
This is what it looks like at the moment
I can't apply the same approach as for plot(), since the following produces an error message:
ln3,=ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==5,facecolor='r',edgecolors='none',alpha=0.5)
TypeError: 'PolyCollection' object is not iterable
This is what it's meant to look like on each frame
fill_between returns a PolyCollection, which expects a list (or several lists) of vertices upon creation. Unfortunately I haven't found a way to retrieve the vertices that where used to create the given PolyCollection, but in your case it is easy enough to create the PolyCollection directly (thereby avoiding the use of fill_between) and then update its vertices upon frame change.
Below a version of your code that does what you are after:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.widgets import Slider
from matplotlib.collections import PolyCollection
#create dataframes
dfs={}
for x in range(100):
col1=np.random.normal(10,0.5,30)
col2=(np.repeat([5,8,7],np.round(np.random.dirichlet(np.ones(3),size=1)*31)[0].tolist()))[:30]
col3=np.random.randint(4,size=30)
dfs[x]=pd.DataFrame({'col1':col1,'col2':col2,'col3':col3})
#create figure,axis,subplot
fig = plt.figure()
gs = gridspec.GridSpec(1,1,hspace=0,wspace=0,left=0.1,bottom=0.1)
ax = plt.subplot(gs[0])
ax.set_ylim([0,12])
#slider
frame=0
axframe = plt.axes([0.13, 0.02, 0.75, 0.03])
sframe = Slider(axframe, 'frame', 0, 99, valinit=0,valfmt='%d')
#plots
ln1,=ax.plot(dfs[0].index,dfs[0]['col1'])
ln2,=ax.plot(dfs[0].index,dfs[0]['col2'],c='black')
##additional code to update the PolyCollections
val_r = 5
val_b = 8
val_g = 7
def update_collection(collection, value, frame = 0):
xs = np.array(dfs[frame].index)
ys = np.array(dfs[frame]['col2'])
##we need to catch the case where no points with y == value exist:
try:
minx = np.min(xs[ys == value])
maxx = np.max(xs[ys == value])
miny = value-0.5
maxy = value+0.5
verts = np.array([[minx,miny],[maxx,miny],[maxx,maxy],[minx,maxy]])
except ValueError:
verts = np.zeros((0,2))
finally:
collection.set_verts([verts])
#artists
##ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==5,facecolor='r',edgecolors='none',alpha=0.5)
reds = PolyCollection([],facecolors = ['r'], alpha = 0.5)
ax.add_collection(reds)
update_collection(reds,val_r)
##ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==8,facecolor='b',edgecolors='none',alpha=0.5)
blues = PolyCollection([],facecolors = ['b'], alpha = 0.5)
ax.add_collection(blues)
update_collection(blues, val_b)
##ax.fill_between(dfs[0].index,y1=dfs[0]['col2']-0.5,y2=dfs[0]['col2']+0.5,where=dfs[0]['col2']==7,facecolor='g',edgecolors='none',alpha=0.5)
greens = PolyCollection([],facecolors = ['g'], alpha = 0.5)
ax.add_collection(greens)
update_collection(greens, val_g)
ax.vlines(x=dfs[0]['col3'].index,ymin=0,ymax=dfs[0]['col3'],color='black')
#update plots
def update(val):
frame = np.floor(sframe.val)
ln1.set_ydata(dfs[frame]['col1'])
ln2.set_ydata(dfs[frame]['col2'])
ax.set_title('Frame ' + str(int(frame)))
##updating the PolyCollections:
update_collection(reds,val_r, frame)
update_collection(blues,val_b, frame)
update_collection(greens,val_g, frame)
plt.draw()
#connect callback to slider
sframe.on_changed(update)
plt.show()
Each of the three PolyCollections (reds, blues, and greens) has only four vertices (the edges of the rectangles), which are determined based on the given data (which is done in update_collections). The result looks like this:
Tested in Python 3.5
Your error
TypeError: 'PolyCollection' object is not iterable
can be avoided by removing the comma after l3:
l3 = ax.fill_between(xx, y1, y2, **kwargs)
The return value is a PolyCollection, you need to update its vertices during the update() function. An alternative to the other answer posted here is to make fill_between() give you a new PolyCollection, and then get its vertices and use them to update those of l3:
def update(val):
dummy_l3 = ax.fill_between(xx, y1, y2, **kwargs)
verts = [ path._vertices for path in dummy_l3.get_paths() ]
codes = [ path._codes for path in dummy_l3.get_paths() ]
dummy_l3.remove()
l3.set_verts_and_codes(verts, codes)
plt.draw()
The above code does not run for me; however, to refresh fill_between the following works for me
%matplotlib inline
import numpy as np
from IPython import display
import matplotlib.pyplot as plt
import time
hdisplay = display.display("", display_id=True)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
x = np.linspace(0,1,100)
ax.set_title("Test")
ax.set_xlim()
y = np.random.random(size=(100))
dy = 0.1
l = ax.plot(x,y,":",color="red")
b = ax.fill_between( x, y-dy, y+dy, color="red", alpha=0.2 )
hdisplay.update(fig)
for i in range(5):
time.sleep(1)
ax.set_title("Test %ld" % i)
y = np.random.random(size=(100))
l[0].set_ydata( y )
b.remove()
b = ax.fill_between( x, y-dy, y+dy, color="red", alpha=0.2 )
plt.draw()
hdisplay.update(fig)
plt.close(fig)

Categories

Resources