Related
I'm getting the blank plot by using the below code and data file. Could you please let me know what's wrong with data file or the code?
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('file1.txt', delimiter=' ')
lats = data[:,0]
lons = data[:,1] values = data[:,2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
zre = np.full(xre.shape, np.nan)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1,1, figsize = (10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated") plt.show()
test.txt file --
1 2 3
4 5 6
7 8 9
10 11 12
Your program has no problem
There were none values in your matrix, in which case the graph will not be drawn.
It makes no sense to have null values in a matrix and try to draw those points.
By changing its values, the 'contourf' command can fit the values of X, Y, and Z, and the graph is drawn.
See this :
import numpy as np
import matplotlib.pyplot as plt
data = np.genfromtxt('test.txt', delimiter=' ')
lats = data[:, 0]
lons = data[:, 1]
values = data[:, 2]
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
# zre = np.full(xre.shape, np.nan)
zre = np.full(xre.shape, 0)
zre[lat_idx, lon_idx] = values
print(zre)
fig, (ax1) = plt.subplots(1, 1, figsize=(10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated")
plt.show()
Output:
I am trying to create a figure that is a dendrogram on top of a scatterplot, where the ends of the leaves on the dendrogram match up with the dots on the scatterplot, which in turn match up with the tick labels below. I have this working, but for some reason the tick labels appear twice. The labels in red and green are the ones I'm trying to keep.
This is my code:
import pandas as pd
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as sch
import numpy as np
import json
import random
def scatter_and_dendrogram(df, colors,wn='',label_x=False):
'''Args:
df (Pandas DataFrame): similarity matrix
colors (list of strs): list of colors
wn (str): window name
label_x=False(Bool): whether or not to label x axis
Returns: None
'''
norm = plt.Normalize(1,4)
dist_matrix = [] #linkage
for i in range(len(df)):
arr = []
for j in range(1,len(df.iloc[i])):
arr.append(df.iloc[i,j])
dist_matrix.append(list(arr))
X = np.asarray(dist_matrix)
Z = sch.linkage(X, 'ward')
sch.set_link_color_palette(['b'])
fig = plt.figure()
fig, axs = plt.subplots(2, 1, sharex='col', sharey='row',
gridspec_kw={'width_ratios': [1],
'height_ratios': [30, 1],
'hspace': 0, 'wspace': 0})
(ax1, ax2) = axs
dendrogram = sch.dendrogram(Z=Z, p=3,ax=ax1)
icoords = dendrogram['icoord']
dcoords = dendrogram['dcoord']
lst = [[],[],colors]
for i in range(len(icoords)):
ic = icoords[i]
dc = dcoords[i]
if dc.count(0) == 2:
lst[0].append(ic[0])
lst[0].append(ic[-1])
elif dc.count(0) == 1:
ind = dc.index(0)
lst[0].append(ic[ind])
lst[1] = [-0.1]*len(lst[0])
ax2.scatter(lst[0],lst[1],s=10,norm=norm, alpha=0.7)
fig.canvas.set_window_title(wn)
ax1.set_yticklabels([])
ax1.set_xticklabels([])
ax2.set_yticklabels([])
ax2.set_xticklabels([])
if label_x:
letters = list('ABCD')
labels = [letters[ind] for ind in dendrogram['leaves']]
c1 = '#ff0033' #red
c2 = '#006600'#green
xlbls = ax2.set_xticklabels(labels,fontsize=11,linespacing=3)
for lbl in xlbls:
t = lbl.get_text()
c = c2
if letters.index(t) < 2:
c = c1
print(c)
lbl.set_color(c)
ax1.set_title(wn)
ax1.set_ylabel('Aggregation Criterion',fontsize=15)
ax2.set_xlabel('Articles', fontsize=15)
plt.show()
l = ['A','B','C','D']
df = pd.DataFrame(index=l, columns=l)
for i in range(len(l)-1):
for j in range(i+1, len(l)):
r = random.randint(0, 10)
df.iloc[i,j] = r
df.iloc[j, i] = r
df.fillna(0,inplace=True)
print(df)
wn = 'Set C'
scatter_and_dendrogram(df, l, wn,True)
This is what it looks like:
According to matplotlib.pyplot.subplots about sharex and sharey
When subplots have a shared x-axis along a column, only the x tick
labels of the bottom subplot are created.
Similarly, when subplots have a shared y-axis along a row, only the y tick labels of the first column subplot are created.
To later turn other subplots' ticklabels on, use tick_params.
You need to add ax1.tick_params(axis='x', labelbottom=False) under xlbls = ax2.set_xticklabels.
Besides, if fig = plt.figure() is useless, remove it.
I'm trying to fill between two adjacent intervals:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
ax.fill_between(df.index[df.Color == 'r'],
df.Value[df.Color == 'r'], color='r')
ax.fill_between(df.index[df.Color == 'g'],
df.Value[df.Color == 'g'], color='g')
Which gives me:
However, I would like to color the interval between index 5 and 6 as well (red).
How can I do this?
You not only want to fill where the Color is r, but also where the color changes from red to green. You may formulate this as a condition and use the where argument to fill_between.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color': ['r','r','r','r','r','r','g','g','g','g']})
fig,ax = plt.subplots(1,1,figsize=(8,3))
ax.plot(df.Value)
cond = df.Color == 'r'
cond2 = cond ^ np.concatenate(([0],np.diff(cond.astype(int)) == -1))
ax.fill_between(df.index, df.Value, where=cond2, color='r')
ax.fill_between(df.index, df.Value, where=~cond, color='g')
plt.show()
While this works fine in this case, it will fail when single intervals are involved. In that case you cannot use fill_between.
A most general solution for colorizing the area below a curve in the interval following a point could be the following. This creates a polygon of vertices based on the condition and plots a collection of those polygons in the respective color.
In this example a couple of test cases are shown:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
t1 = ['r','r','r','r','r','r','g','g','g','g']
t2 = ['r','g','r','r','g','r','g','g','r','g']
t3 = ['g','g','r','r','g','r','g','g','r','r']
t4 = ['g','r','g','g','r','g','g','g','g','r']
df = pd.DataFrame({'Value': np.random.randint(10,size=10),
'Color1': t1, "Color2": t2, 'Color3': t3, "Color4": t4})
def fill_intervals_post(x,y, color, cond, ax=None):
ax = ax or plt.gca()
cond1 = color == cond
start = np.diff(cond1.astype(int)) == 1
end = np.diff(cond1.astype(int)) == -1
inxstart = np.arange(len(color)-1)[start]+1
inxend = np.arange(len(color)-1)[end]+2
inx = np.concatenate(([0,0],np.sort(np.concatenate((inxstart,inxend)))))
xs = np.split(x, inx)[(~cond1).astype(int)[0]::2]
ys = np.split(y, inx)[(~cond1).astype(int)[0]::2]
verts = []
for xi,yi in zip(xs,ys):
xsi = np.concatenate((xi, xi[::-1]))
ysi = np.concatenate((yi, np.zeros_like(yi)))
verts.append(np.c_[xsi,ysi])
p = PolyCollection(verts, color=cond)
ax.add_collection(p)
fig,axes = plt.subplots(4,1,figsize=(8,9))
for i,ax in enumerate(axes):
ax.plot(df.index.values, df.Value.values, color="k")
ax.set_title(df["Color{}".format(i+1)].values)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "r", ax=ax)
fill_intervals_post(df.index.values,df.Value.values,
df["Color{}".format(i+1)].values, "g", ax=ax)
fig.tight_layout()
plt.show()
I have an GridSpec defined layout with to subgrids, one is supposed to include a colorbar
import pylab as plt
import numpy as np
gs_outer = plt.GridSpec(1, 2, width_ratios=(10, 1))
gs_inner = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(2, 3, gs_outer[0])
ax = []
for i in xrange(6):
ax.append(plt.subplot(gs_inner[i]))
plt.setp(ax[i].get_xticklabels(), visible=False)
plt.setp(ax[i].get_yticklabels(), visible=False)
ax.append(plt.subplot(gs_outer[1]))
plt.show()
I'd now like to get for the left part a row-wise labeling like this:
I tried to add another GridSpec into the GridSpec, but that did not work out:
import pylab as plt
import numpy as np
fig = plt.figure()
gs_outer = plt.GridSpec(1, 2, width_ratios=(10, 1))
gs_medium = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(3, 1, gs_outer[0])
ax_title0 = plt.subplot(gs_medium[0])
ax_title0.set_title('Test!')
gs_row1 = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(1, 3, gs_medium[0])
ax00 = plt.subplot(gs_row1[0]) # toggle this line to see the effect
plt.show()
Adding the ax00 = plt.subplot... line seems to erase the previously created axis
Following CT Zhu comment I came up with the following answer (I don't really like it, but it seems to work)
import pylab as plt
import numpy as np
fig = plt.figure()
rows = 2
cols = 3
row_fraction = 9
row_size = row_fraction / float(rows)
gs_outer = plt.GridSpec(1,2, width_ratios=(9,1))
gs_plots= plt.matplotlib.gridspec.GridSpecFromSubplotSpec(rows * 2, cols, subplot_spec=gs_outer[0], height_ratios = rows * [1, row_size])
# Create title_axes
title_ax = []
for ta in xrange(rows):
row_index = (ta) * 2
title_ax.append(plt.subplot(gs_plots[row_index, :]))
# Create Data axes
ax = []
for row in xrange(rows):
row_index = (row + 1) * 2 -1
for col in xrange(cols):
try:
ax.append(plt.subplot(gs_plots[row_index, col], sharex=ax[0], sharey=ax[0]))
except IndexError:
if row == 0 and col == 0:
ax.append(plt.subplot(gs_plots[row_index, col]))
else:
raise IndexError
# Delete Boxes and Markers from title axes
for ta in title_ax:
ta._frameon = False
ta.xaxis.set_visible(False)
ta.yaxis.set_visible(False)
# Add labels to title axes:
for ta, label in zip(title_ax, ['Row 1', 'Row 2']):
plt.sca(ta)
plt.text(
0.5, 0.5, label, horizontalalignment='center', verticalalignment='center')
# Add common colorbar
gs_cb = plt.matplotlib.gridspec.GridSpecFromSubplotSpec(
1, 1, subplot_spec=gs_outer[1])
ax.append(plt.subplot(gs_cb[:, :]))
Of course labeling and ticklabels could be improved. But how to achive that is likely already explained on SO.
Let's define an example grid pltgrid:
pltgrid = gridspec.GridSpec(ncols=3, nrows=2,
width_ratios=[1]*3, wspace=0.3,
hspace=0.6, height_ratios=[1]*2)
Before your for loop, you can define a list ax using map:
num=list(range(7))
ax=list(map(lambda x : 'ax'+str(x), num))
You may have a list plotnames containing the names. As an example, I'll plot a normal distribution Q-Q plot for each i in the for loop:
for i in xrange(6):
ax[i]=fig.add.subplot(pltgrid[i])
res = stats.probplot(x, dist="norm", plot=ax[i])
# set title for subplot using existing 'plotnames' list
ax[i].set_title(plotnames[i])
# display subplot
ax[i]
I have this graph so far, it's kind of ugly. Each type of marker is an accuracy for a type of algorithm.
There are two problems with this:
I'd like there to be space between the content and the axises, but only show ticks for y [0, 1] and have the x axis show no negative values (no such thing as negative time).
I'd like to display the x values as their log transform, but keep the original values on the ticks so you can see the actual values.
For #1 I tried playing with xticks as you can see below but without much success.
For #2, it's simple to throw the data into an np.log10() but then the axis ticks are also log transformed. I feel like there should be a simple way to do this log display (seems like a pretty normal thing to do)?
Here's my code so far:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import random
# create fake data
data = {}
data['A'] = []
data['B'] = []
data['C'] = []
n = 5
data['A'] = zip(np.random.uniform(0, 10000, size=n), np.random.uniform(0, 0.6, size=n))
data['B'] = zip(np.random.uniform(0, 200, size=n), np.random.uniform(0, 0.6, size=n))
data['C'] = zip(np.random.uniform(0, 5000, size=n), np.random.uniform(0, 0.6, size=n))
# make graph
markers = ['+', '*', 'x']
colors = ['b', 'r', 'g']
fig = plt.figure()
ax1 = fig.add_subplot(111)
plots = []
labels = []
# extract data
i = 0
for algorithm in ['A', 'B', 'C']:
results = data[algorithm]
testing = np.array([float(x[1]) for x in results if x > 0.0])
ts = np.array([int(x[0]) for x in results if x > 0.0])
color = colors[i]
marker = markers[i]
plot = ax1.scatter(ts, testing, color=color, marker=marker, s=10)
plots.append(plot)
labels.append(algorithm)
i += 1
# set axis and title
ax1.legend(plots, labels, loc='lower right')
ax1.set_xlabel("Time (sec)")
ax1.set_ylabel("Testing Accuracy")
ax1.set_title("Time versus testing accuracy")
# set axis limits
xticks, xticklabels = plt.xticks()
xmin = (3*xticks[0] - xticks[1])/2.
xmax = (3*xticks[-1] - xticks[-2])/2.
plt.xlim(xmin, xmax)
plt.xticks(xticks)
plt.ylim(0.0, 1.0)
# save to disk
plt.savefig("scatter.eps")
why you are not doing this?
ax1.set_ylim( -.1, 1.1 )
ax1.set_yticks( np.linspace(0, 1, 10) )
ax1.set_xscale('log')