Why is my grid offset in this example? - python

I am trying to draw a randomly occupied grid with matplotlib. The grid looks offset from the blocks by a random amount:
Here is the code:
import matplotlib.pyplot as plt
import numpy as np
# Make a 10x10 grid...
nrows, ncols = 10,10
# Fill the cells randomly with 0s and 1s
image = np.random.randint(2, size = (nrows, ncols))
# Make grid
vgrid = []
for i in range(nrows + 1):
vgrid.append((i - 0.5, i - 0.5))
vgrid.append((- 0.5, 9.5))
hgrid = []
for i in range(ncols + 1):
hgrid.append((- 0.5, 9.5))
hgrid.append((i - 0.5, i - 0.5))
row_labels = range(nrows)
col_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'j']
plt.matshow(image, cmap='Greys')
for i in range(11):
plt.plot(hgrid[2 * i], hgrid[2 * i + 1], 'k-')
plt.plot(vgrid[2 * i], vgrid[2 * i + 1], 'k-')
plt.axis([-0.5, 9.5, -0.5, 9.5])
plt.xticks(range(ncols), col_labels)
plt.yticks(range(nrows), row_labels)
plt.show()
The problem seems to happen when I enforce a plot area; this line:
plt.axis([-0.5, 9.5, -0.5, 9.5])
Also, please feel free to suggest a better method. I am new to pyplot.

You can use plt.grid() to plot the axes grid. Unfortunately it won't solve the issue. The misalignment of the grid is a known issue for imshow (a function that is called by matshow).
I suggest to play with the figure size and the linewidth of the grid, until you get something acceptable.
plt.figure(figsize=(5,5));
nrows, ncols = 10,10
image = np.random.randint(2, size = (nrows, ncols))
row_labels = range(nrows)
col_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'j']
plt.matshow(image, cmap='Greys',fignum=1,interpolation="nearest")
#set x and y ticks and labels
plt.xticks(range(ncols), col_labels)
plt.yticks(range(nrows), row_labels);
#set minor axes in between the labels
ax=plt.gca()
ax.set_xticks([x-0.5 for x in range(1,ncols)],minor=True )
ax.set_yticks([y-0.5 for y in range(1,nrows)],minor=True)
#plot grid on minor axes
plt.grid(which="minor",ls="-",lw=2)

This is known behavior because, by default, matshow() calls imshow() with the argument interpolation="nearest". You should get better results by overriding the argument manually:
plt.matshow(image, cmap='Greys', interpolation="none")

Related

How to generate proper legends for scatter plot in python

I am trying to prepare a box and scatter plot for 8 data points in python. I use the following code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
f, ax = plt.subplots()
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
x0 = np.random.normal(1, 0.05, len(x))
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
ax.scatter(x, x0, c=c, s=60, alpha=0.2)
ax.legend(labels=lab, loc="upper left", ncol=8)
It generate a image like the following:
It looks that the legend doesn't have the proper sphere symbols with different colors, which I expected. Beside the colors for the symbols are shallow and light.
So how to generate proper legends with correct symbols and how to make the colors of the symbols brighter and sharper?
I will deeply appreciate it if anyone can help.
Best regards
To make the colours brighter, just raise the alpha value.
For the legend, the order of the plotting matters here, it is better that the boxplot is plotted after the scatter plots. Also, to get for each point a place in the legend, it should b considered as a different graph, for that I used a loop to loop over the values of x, x0 and c. Here's the outcome:
import numpy as np
import matplotlib.pyplot as plt
# init figure
f, ax = plt.subplots()
# values
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
x0 = np.random.normal(1, 0.05, len(x))
# labels and colours
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
# put the plots into a list
plots = []
for i in range(len(x)):
p = ax.scatter(x[i], x0[i], c=c[i], s=60, alpha=0.5) # raised the alpha to get sharper colors
plots.append(p)
# plot legends
plt.legend(plots,
labels=lab,
scatterpoints=1,
loc='upper left',
ncol=8,
fontsize=8)
# plot the box plot (the order here matters!)
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
# save the desired figure
plt.savefig('tt.png')
Output:

Circlify - change the colour of just one of the circles

I have this code:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({
'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]
})
circles = circlify.circlify(
df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1)
)
# Create just a figure and only one subplot
fig, ax = plt.subplots(figsize=(10,10))
# Title
ax.set_title('Basic circular packing')
# Remove axes
ax.axis('off')
# Find axis boundaries
lim = max(
max(
abs(circle.x) + circle.r,
abs(circle.y) + circle.r,
)
for circle in circles
)
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)
# list of labels
labels = df['Name']
# print circles
for circle, label in zip(circles, labels):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.2, linewidth=2,color='#e6d4ff'))
plt.annotate(
label,
(x,y ) ,
va='center',
ha='center',
size=12
)
It produces this output:
I wanted to change the colour of just one of the circles (for example, the biggest circle).
I tried changing the colour from:
color='#e6d4ff'
to, for example, a list of colours:
color=['#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#ffc4c4']
with the error:
RGBA sequence should have length 3 or 4
I guess the error is saying if I'm providing a list, then the list should just be RGB dimensions.
Would someone be able to show me? (I couldn't see it in the python graph gallery e.g. [here][2] or the circlify doc here but maybe I've missed it?)
In each call to plt.Circle(...) you're only creating one circle, which has only one color. To assign different colors to different circles, the colors can be added into the for loop, e.g. : for circle, label, color in zip(circles, labels, colors):.
Note that circlify expects the list of values in sorted order, and that the returned list contains the circles sorted from smallest to largest. In your example code, D is the largest circle, but in your plot, you labeled it as F. Sorting the dataframe at the start and using that order helps to keep values and labels synchronized.
Here is the example code, having D as largest and with a different color (the code also changes a few plt. calls to ax. to be more consistent):
import matplotlib.pyplot as plt
import pandas as pd
import circlify
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]})
df = df.sort_values('Value') # the order is now ['B', 'A', 'E', 'C', 'F', 'D']
circles = circlify.circlify(df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1))
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Basic circular packing')
ax.axis('off')
ax.set_aspect('equal') # show circles as circles, not as ellipses
lim = max(max(abs(circle.x) + circle.r, abs(circle.y) + circle.r, )
for circle in circles)
ax.set_xlim(-lim, lim)
ax.set_ylim(-lim, lim)
labels = df['Name'] # ['B', 'A', 'E', 'C', 'F', 'D']
colors = ['#ffc4c4' if val == df['Value'].max() else '#e6d4ff' for val in df['Value']]
for circle, label, color in zip(circles, labels, colors):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.7, linewidth=2, color=color))
ax.annotate(label, (x, y), va='center', ha='center', size=12)
plt.show()

Adding Percentages to sns.countplot - how do I show percentages for two values within the categories?

Hi I'm trying to add percentages to my countplot with 5 categories and 2 values (old and younger). I've tried adding the def and loop from
How to add percentages on top of bars in seaborn?
My code:
plt.figure(figsize =(7,5))
ax = sb.countplot(data = df_x_1, x = 'concern_virus', hue = 'age')
plt.xticks(size =12)
plt.xlabel('Level of Concern', size = 14)
plt.yticks(size = 12)
plt.ylabel('Number of People', size = 12)
plt.title("Older and Younger People's Concern over the Virus", size = 16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right");
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width()
y = p.get_height()
ax.annotate(percentage, (x, y),ha='center')
plt.show()
As you can see, the percentages don't make sense.
The problem seems to be with the variable that is undefined in the above code: total. total should be the number you want to call 100%, for example the total number of rows in the dataframe. That way all the displayed percentages sum up to 100.
Here is some sample code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
N = 250
df_x_1 = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], N),
'age': np.random.choice(['younger', 'older'], N)})
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=['younger', 'older'],
palette=['chartreuse', 'darkviolet'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
total = len(df_x_1)
for p in ax.patches:
percentage = f'{100 * p.get_height() / total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()
To have the text in the center of the bar, it helps to choose ha='center' and add half the width to the x-position. Appending a newline to the text can help to position the text nicely on top of the bar. plt.tight_layout() can help to fit all the labels into the plot.
Seaborn lets you fix the order of the x-axis via order=.... The order of the legend elements and the corresponding colors can be set via hue_order=... and palette=....
PS: For the new question, with totals per age group, instead of directly looping through all the bars, a first loop can visit the groups:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
label_younger = 'younger'
label_older = 'older'
df_younger = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 230)})
df_older = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 120)})
df_younger['age'] = label_younger
df_older['age'] = label_older
df_x_1 = pd.concat([df_younger, df_older], ignore_index=True)
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=[label_younger, label_older],
palette=['orangered', 'skyblue'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
for bars in ax.containers:
if bars.get_label() == label_younger:
group_total = len(df_younger)
else:
group_total = len(df_older)
for p in bars.patches:
# print(p.get_facecolor(), p.get_label())
percentage = f'{100 * p.get_height() / group_total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()

How can I get annotations (neatly) outside the convex hull?

I have developed a bit of code to automatically generate an equilateral n-dimensional polygon:
# Create equilateral n-dimensional polygon
def polygon(side, radius=1, rotation=0, translation=None):
import math
vertex = 2 * math.pi / side
points = [
(math.sin(vertex * i + rotation) * radius,
math.cos(vertex * i + rotation) * radius)
for i in range(side)]
if translation:
points = [[sum(pair) for pair in zip(point, translation)]
for point in points]
return np.array(points)
Now, I want to put labels neatly to the outside corners of this n-dimensional polygon. In the following example I have created a hexagon with radius 10, centered around (3,3).
import matplotlib.pyplot as plt
pol = polygon(7, 10, 0, [3,3])
hull = ConvexHull(pol)
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', "L", 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
fig = plt.figure(figsize=(4, 4), dpi=100)
for simplex in hull.simplices:
plt.plot(pol[simplex,0], pol[simplex,1], 'k-')
plt.plot(pol[:,0], pol[:,1], 'gs', ms=10)
if labels is not None:
for i, label in enumerate(labels):
if i <= len(pol)-1:
plt.annotate(label, xy=(pol[:,0][i],pol[:,1][i]), xytext=(0, 8),
textcoords='offset points', ha="center", va="bottom")
plt.axis('off')
plt.show()
Unfortunately, as the figure shows, only point A, B, and F lay neatly outside the hexagon. Is there a systematic way to annotate the labels to the outside corner of the polygon (hexagon in this case), no matter the dimension n? Thanks in advance!
Plot of hexagon with wrongly placed annotations
First, let's look at the special case of a n-dimensional regular polygon.
For this, you can just put the annotations on the vertices of a slightly larger polygon (I used 1.2 times the original radius).
Below is the full code and result.
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
r = 10 # radius
center = [3, 3]
pol = polygon(7, r, 0, center)
pol2 = polygon(7, 1.2*r, 0, center) # for annotations
hull = ConvexHull(pol)
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', "L", 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
fig = plt.figure(figsize=(4, 4), dpi=100)
for simplex in hull.simplices:
plt.plot(pol[simplex,0], pol[simplex,1], 'k-')
plt.plot(pol[:,0], pol[:,1], 'gs', ms=10)
if labels is not None:
for i, label in enumerate(labels):
if i <= len(pol)-1:
plt.annotate(label, xy=(pol2[i,0], pol2[i,1]), xytext=(0, 0),
textcoords='offset points', ha="center", va="center")
plt.xlim(center[0] - 1.5*r, center[0] + 1.5*r)
plt.ylim(center[1] - 1.5*r, center[1] + 1.5*r)
plt.axis('off')
plt.show()
Now, let's look at a general convex hull. An easy solution would be the following:
For each simplex S, calculate the mid point M of its neighbouring two simplices (called N_1 and N_2). We know this midpoint must be in the interior of the convex hull.
(N_1, N_2) = hull.neighbors(S)
M = (pol[N_1] + pol[N_2]) / 2
Draw the line from M to S, and take the new point M_ext which is on the line, so that S is equidistant to M and M_ext, but with M_ext being on the other side. We know that M_ext is definitely, in that case.
M_ext = pol[S] + (pol[S] - M)
You could potentially normalize it, so that the annotations are the same distance to the simplex (e.g. using numpy.linalg.norm). In my code I also multiplied by a constant factor, so that the text does not overlap with the vertices.
M_ext = pol[S] + (pol[S] - M) / np.linalg.norm(pol[S]-M)
Again full code & result below:
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import ConvexHull
r = 10 # radius
center = [3, 3]
pol = polygon(7, r, 0, center)
pol2 = polygon(7, 1.2*r, 0, center) # for annotations
hull = ConvexHull(pol)
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', "L", 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
fig = plt.figure(figsize=(4, 4), dpi=100)
for simplex in hull.simplices:
plt.plot(pol[simplex,0], pol[simplex,1], 'k-')
plt.plot(pol[:,0], pol[:,1], 'gs', ms=10)
if labels is not None:
for i, label in enumerate(labels):
if i <= len(pol)-1:
S = i
(N_1, N_2) = hull.neighbors[S]
M = (pol[N_1] + pol[N_2]) / 2
M_ext = pol[S] + (pol[S] - M) / np.linalg.norm(pol[S] - M) * 0.2*r
plt.annotate(label, xy=M_ext, xytext=(0, 0),
textcoords='offset points', ha="center", va="center")
plt.xlim(center[0] - 1.5*r, center[0] + 1.5*r)
plt.ylim(center[1] - 1.5*r, center[1] + 1.5*r)
plt.axis('off')
plt.show()

Adjusting opacity and adding annotation and text to matplotlib figure

I want to create some publication quality figures. I want to add text and annotate my points as in my attempt below. I have an issue with my actual figures in .eps format. When I zoom in, the plot points are still visible despite "s = 0.00001" and the opacity of the line appears to change from part to part, looks choppy and not as stylish as I would like it to be. The line also overlaps wit hthe text and points, sometimes it's too messy and hard to read. Any other ideas to breathe some style, and color into my figures would be appreciated as well.
import matplotlib.pyplot as plt
from scipy import stats
x = [1,2,3,4,5,6,7,8,9]
y = [1,2,3,4,5,6,7,8,9]
n = ['A', 'B', 'C', 'D', 'E' , 'F', 'G', 'H', 'I']
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
fig, ax = plt.subplots()
plt.scatter(x, y, marker='o', color = 'k', s = 0.00001)
for i, txt in enumerate(n):
ax.annotate(txt, (x[i],y[i]))
predict_y = [(intercept + (slope * x)) for x in x]
plt.plot(x, predict_y,'k-', alpha=0.4, LineWidth=0.3)
plt.xlabel('Number 1')
plt.ylabel('Number 2')
plt.figtext(.73, .84, u"R²: %0.2f " % r_value**2)
plt.figtext(.73, .79, u"P-value: %0.3f " % p_value)
plt.savefig('test.eps', format = 'eps', dpi=1000)
plt.show()
It's wired, if you don't want points to show, why bother using scatter()? The plot generated in my computer proves fine, and opacity are not supposed to change according to your code, if it does, I doubt if you screen is clean... And to deal with overlaps, I suggest move the texts a little bit.
I made several changes, trying to make it looks better and add some colors(but I'm not sure if that's the 'stylish' you mean):
import matplotlib.pyplot as plt
from scipy import stats
import numpy.random as npran
x = [1,2,3,4,5,6,7,8,9]
y = [1,2,3,4,5,6,7,8,9]
n = ['A', 'B', 'C', 'D', 'E' , 'F', 'G', 'H', 'I']
col = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
fig, ax = plt.subplots()
plt.scatter(x, y, marker='o', color = 'k', s = 1) #make points more obvious
for i, txt in enumerate(n):
#shift texts little
ax.annotate(txt, (x[i]-0.2, y[i]+0.3), fontsize = 15, \
color = [npran.rand() for i in range(3)])#make it colorfull?
#or use color = npran.choice(col)
predict_y = [(intercept + (slope * x)) for x in x]
plt.plot(x, predict_y, 'k-', alpha=0.3, linewidth=0.75) #wider line
plt.xlabel('Number 1', fontsize = 20) #Larger font
plt.ylabel('Number 2', fontsize = 20)
plt.grid(color = 'r') #add grids
# make the texts together, and move it to right-down
plt.figtext(.65, .15, "$R^2: {:.2f}$\n$P-value: {:.3f}$".format(r_value**2, p_value), \
bbox={'facecolor':'red', 'alpha':0, 'pad':10}, fontsize = 15)
#you can change to have the boundry box visible
#like 'bbox={'facecolor':'white', 'alpha':1, 'pad':10}'
plt.show()

Categories

Resources