i created a dataframe with random columns and values. now i am trying to interate with an loop over "time" window" (maybe there is a more elegant solution than mine). i try to plot the calculated correlations in a heatmap and then interate furhter and show the next result in the same figure. Like this
https://datasoaring.blogspot.com/2018/07/gdp-correlation-matrix-top-10-economies.html
The current code plot a new figure for each correlation...
Thanks for ideas and help!
Creates Dataframe
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import time
import seaborn as sns
sns.set_style('white')
plt.style.use('dark_background')
index = pd.date_range('01/01/2010',periods=num_days, freq='D')
data_KW = pd.DataFrame(np.random.randint(0,250,size=(250, 10)), columns=list('ABCDEFGHIJ'), index=index)
data_KW.head()
interate and plot (wrong :))
# Calculate the lenght of the Dataframe
end = 10 #len(data_KW.index)
# is the variable for the rolling window
var_start = 0
var_end = 5
#Set up the matplotlib figure
f, ax = plt.subplots(figsize=(5, 5))
while var_end <= end:
window = data_KW.iloc[var_start : var_end]
# Compute the correlation matrix
corr = window.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
#plt.pause(3)
plt.show()
time.sleep(2)
#time.sleep(5)
var_start = var_start + 1
var_end = var_end + 1
print(var_start)
Related
I'm trying to plot a series of frequency spectra in a 3D space using PolyCollection. My goal is to set "facecolors" as a gradient, i.e., the higher the magnitude, the lighter the color.
Please see this image for reference (I am not looking for the fancy design, just the gradients).
I tried to use the cmap argument of the PollyCollection, but I was unsuccessful.
I came this far with the following code adapted from here:
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
from mpl_toolkits.mplot3d import axes3d
import numpy as np
from scipy.ndimage import gaussian_filter1d
def plot_poly(magnitudes):
freq_data = np.arange(magnitudes.shape[0])[:,None]*np.ones(magnitudes.shape[1])[None,:]
mag_data = magnitudes
rad_data = np.linspace(1,magnitudes.shape[1],magnitudes.shape[1])
verts = []
for irad in range(len(rad_data)):
xs = np.concatenate([[freq_data[0,irad]], freq_data[:,irad], [freq_data[-1,irad]]])
ys = np.concatenate([[0],mag_data[:,irad],[0]])
verts.append(list(zip(xs, ys)))
poly = PolyCollection(verts, edgecolor='white', linewidths=0.5, cmap='Greys')
poly.set_alpha(.7)
fig = plt.figure(figsize=(24, 16))
ax = fig.add_subplot(111, projection='3d', proj_type = 'ortho')
ax.add_collection3d(poly, zs=rad_data, zdir='y')
ax.set_xlim3d(freq_data.min(), freq_data.max())
ax.set_xlabel('Frequency')
ax.set_ylim3d(rad_data.min(), rad_data.max())
ax.set_ylabel('Measurement')
ax.set_zlabel('Magnitude')
# Remove gray panes and axis grid
ax.xaxis.pane.fill = False
ax.xaxis.pane.set_edgecolor('white')
ax.yaxis.pane.fill = False
ax.yaxis.pane.set_edgecolor('white')
ax.zaxis.pane.fill = False
ax.zaxis.pane.set_edgecolor('white')
ax.view_init(50,-60)
plt.show()
sample_data = np.random.rand(2205, 4)
sample_data = gaussian_filter1d(sample_data, sigma=10, axis=0) # Just to smoothe the curves
plot_poly(sample_data)
Besides the missing gradients I am happy with the output of the code above.
I want to create a heatmap out of 3 1dimensional arrays. Something that looks like this:
Up to this point, I was only able to create a scatter plot where the markers have a different color and marker size depending on the third value:
My code:
xf = np.random.rand(1000)
yf = np.random.rand(1000)
zf = 1e5*np.random.rand(1000)
ms1 = (zf).astype('int')
from matplotlib.colors import LinearSegmentedColormap
# Remove the middle 40% of the RdBu_r colormap
interval = np.hstack([np.linspace(0, 0.4), np.linspace(0.6, 1)])
colors = plt.cm.RdBu_r(interval)
cmap = LinearSegmentedColormap.from_list('name', colors)
col = cmap(np.linspace(0,1,len(ms1)))
#for i in range(len(ms1)):
plt.scatter(xf, yf, c=zf, s=5*ms1/1e4, cmap=cmap,alpha=0.8)#, norm =matplotlib.colors.LogNorm())
ax1 =plt.colorbar(pad=0.01)
is giving me this result:
Any idea how I could make it look like the first figure?
Essentially what I want to do is find the average of the z value for groups of the x and y arrays
I think the functionality you are looking for is provided by scipy.stats.binned_statistic_2d. You can use it to organize values of xf and yf arrays into 2-dimensional bins, and compute the mean of zf values in each bin:
import numpy as np
from scipy import stats
np.random.seed(0)
xf = np.random.rand(1000)
yf = np.random.rand(1000)
zf = 1e5 * np.random.rand(1000)
means = stats.binned_statistic_2d(xf,
yf,
values=zf,
statistic='mean',
bins=(5, 5))[0]
Then you can use e.g. seaborn to plot a heatmap of the array of mean values:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 8))
sns.heatmap(means,
cmap="Reds_r",
annot=True,
annot_kws={"fontsize": 16},
cbar=True,
linewidth=2,
square=True)
plt.show()
This gives:
I want to automatize an imshow degrading figure with python3. I would like to give a data frame and this to be plot no matter how many columns are given.
I tried this:
vmin = 3.5
vmax = 6
fig, axes = plt.subplots(len(list(df.columns)),1)
for i,j in zip(list(df.columns),range(1,len(list(df.columns))+1)):
df = df.sort_values([i], ascending = False)
y = df[i].tolist()
gradient = [y,y]
plt.imshow(gradient, aspect='auto', cmap=plt.get_cmap('hot_r'), vmin=vmin, vmax=vmax)
axes = plt.subplot(len(list(df.columns)),1,j)
sm = plt.cm.ScalarMappable(cmap=plt.get_cmap('hot_r'),norm=plt.Normalize(vmin,vmax))
sm._A = []
plt.colorbar(sm,ax=axes)
plt.show()
My problem is that the first set of data (first column of the df) is never showed. Also the map is not where I want it to be. This is exactly what I get:
But this is what I want:
You shouldn't use plt.subplot if you already have created your subplots via plt.subplots.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
f = lambda x, s: x*np.exp(-x**2/s)/2
df = pd.DataFrame({"A" : f(np.linspace(0,50,600),70)+3.5,
"B" : f(np.linspace(0,50,600),110)+3.5,
"C" : f(np.linspace(0,50,600),150)+3.5,})
vmin = 3.5
vmax = 6
fig, axes = plt.subplots(len(list(df.columns)),1)
for col, ax in zip(df.columns,axes.flat):
df = df.sort_values([col], ascending = False)
y = df[col].values
gradient = [y,y]
im = ax.imshow(gradient, aspect='auto',
cmap=plt.get_cmap('hot_r'), vmin=vmin, vmax=vmax)
# Since all images have the same vmin/vmax, we can take any of them for the colorbar
fig.colorbar(im, ax=axes)
plt.show()
I would like to add cross (X) on heatmap cells (depending on significance level, but the question is on adding the X).
Like in R-language (sig.level = XXX).
See the Python and R code used and the corresponding output images.
Thank you for your help.
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, center=0, vmin=-1, vmax=1, square=True, linewidths=0.5, fmt=".2f",
cbar_kws={"shrink": .65, "orientation": "horizontal", "ticks":np.arange(-1, 1+1, 0.2)},
annot = True, annot_kws={"weight": 'bold', "size":15})
corrplot(cor(subset (wqw, select =
c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# compute the p matrix
p.mat = cor.mtest(subset
(wqw, select = c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# significance level 0.01
sig.level = 0.01,
# Method to display : color (could be corcle, ...)
method = "color",
# color palette
col = colorRampPalette(c("#BB4444", "#EE9988",
"#FFFFFF", "#77AADD", "#4477AA"))(200),
)
```
The easy solution is to add a scatter plot with an X-shaped marker to cross out the unwanted cells.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
ax.scatter(*np.argwhere(data_masked.T < 0.4).T, marker="x", color="black", s=100)
plt.show()
The drawback of this is that the markersize (s) is independent of the number of cells and needs to be adjusted for different figure sizes.
An alternative is hence to draw some lines (an X are two crossed lines) at the respective positions. Here we create a function crossout(points, ax=None, scale=1, **kwargs), where scale is the percentage the lines shall take from each cell.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
def crossout(points, ax=None, scale=1, **kwargs):
ax = ax or plt.gca()
l = np.array([[[1,1],[-1,-1]]])*scale/2.
r = np.array([[[-1,1],[1,-1]]])*scale/2.
p = np.atleast_3d(points).transpose(0,2,1)
c = LineCollection(np.concatenate((l+p,r+p), axis=0), **kwargs)
ax.add_collection(c)
return c
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
crossout(np.argwhere(data_masked.T < 0.4), ax=ax, scale=0.8, color="black")
plt.show()
For scale=0.8 this looks like
Note that for a pcolormesh plot or a seaborn heatmap (which uses pcolormesh internally), one would need to add 0.5 to the data, i.e.
np.argwhere(data_masked.T < 0.4)+0.5
I need to display values of my matrix using matshow.
However, with the code I have now I just get two matrices - one with values and other colored.
How do I impose them? Thanks :)
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val = 0, 15
for i in xrange(15):
for j in xrange(15):
c = intersection_matrix[i][j]
ax.text(i+0.5, j+0.5, str(c), va='center', ha='center')
plt.matshow(intersection_matrix, cmap=plt.cm.Blues)
ax.set_xlim(min_val, max_val)
ax.set_ylim(min_val, max_val)
ax.set_xticks(np.arange(max_val))
ax.set_yticks(np.arange(max_val))
ax.grid()
Output:
You need to use ax.matshow not plt.matshow to make sure they both appear on the same axes.
If you do that, you also don't need to set the axes limits or ticks.
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
min_val, max_val = 0, 15
intersection_matrix = np.random.randint(0, 10, size=(max_val, max_val))
ax.matshow(intersection_matrix, cmap=plt.cm.Blues)
for i in xrange(15):
for j in xrange(15):
c = intersection_matrix[j,i]
ax.text(i, j, str(c), va='center', ha='center')
Here I have created some random data as I don't have your matrix. Note that I had to change the ordering of the index for the text label to [j,i] rather than [i][j] to align the labels correctly.
In Jupyter notebooks this is also possible with DataFrames and Seaborn:
import numpy as np
import seaborn as sns
import pandas as pd
min_val, max_val = 0, 15
intersection_matrix = np.random.randint(0, 10, size=(max_val, max_val))
cm = sns.light_palette("blue", as_cmap=True)
x=pd.DataFrame(intersection_matrix)
x=x.style.background_gradient(cmap=cm)
display(x)