Matplotlib How to calculate the bottom y value of a scatter dot - python

Here is the code:
import itertools
import pandas as pd
import matplotlib.pyplot as plt
# reuse these colors
colors = itertools.cycle(["r", "b", "g"])
# some random data
df = pd.DataFrame({'x':[1,2,3,4,5],
'y':[2,4,5,2,4],
'area': [100, 200, 400, 500, 800],
'label': ['blah1','blah2','blah3','blah4','blah5']
})
# draw a scatter plot
def draw_scatter_plot(
x,
y,
marker_size,
marker_color: itertools.cycle,
labels
):
fig, ax = plt.subplots(figsize=(12, 8))
if marker_size:
i = 0
while i<len(x):
ax.scatter(x[i], y[i], color = next(marker_color), s = marker_size[i])
ax.annotate(
labels[i],
(x[i], y[i]), # adjust y[i] here
fontproperties=cur_font,
fontsize=14,
ha="center",
va="top",
)
i+=1
plt.show()
draw_scatter_plot(df.x.tolist(),
df.y.tolist(),
df.area.tolist(),
colors,
df.label.tolist())
Here is the result:
As you can see the labels overlap with the bottom of the circle. How can I calculate the bottom y value of the circle so that I can always position the labels such that they do not overlap with the circles?

The idea would be to shift the text by half the diameter of the scatter points, np.sqrt(marker_size[i])/2.. You might then add an additional 2 points such that the text does not touch the markers.
ax.annotate(labels[i],
xy=(x[i], y[i]),
xytext=(0, -np.sqrt(marker_size[i])/2. - 2),
textcoords="offset points",
ha="center",
va="top")

One solution is to use textcoords as shown in this answer. You can turn off the fancy boxes in the original answer.
ax.annotate(labels[i],(x[i], y[i]), xytext=(0, -15),
textcoords='offset points', fontsize=14,ha="center",va="top",)

You can change your annotation as follows:
[plt.annotate("{}, {}".format(x,y), xy=(x,y), xytext=(x-.1,y-.16)) for x, y in zip(x, y)]
Using this, you have to scale your yxtext coordinates depending on the scaling of your x,y axis.
One more comment: because you using it in a while loop, you might want to change this to:
plt.annotate("{}, {}".format(x[i],y[i]), xy=(x[i],y[i]), xytext=(x[i]-.1,y[i]-.16))

Related

Scatterplot with hollow and filled points with matplotlib

I would like to reproduce the scatterplot below. Here is the code I have so far, but I cannot seem to get the points similar to the seed terms to be the same color as the filled points (seed terms). Any help is appreciated.
Also, I cannot figure out why the first word is the color white, even though I used a specific palette?
import pandas as pd
import numpy as np
import matplotlib
seed_terms = ['clean', 'recovery', 'spiral', 'tolerance', 'program']
embeddings_ex = np.random.rand(5, 10, 2)
embeddings_ex = np.array(embeddings_ex)
words_ex = [['quit', 'finally', 'pill', 'vomit', 'survive' ,'lil', 'chance' ,'chain', 'zero',
'quickly'],
['bullshit' ,'unrelated', 'everywhere', 'appear' ,'probably' ,'deal',
'mistake', 'window', 'comment', 'honest'],
['majority' ,'familiar', 'queer', 'edgy', 'skin', 'withdrawl' ,'sad', 'develop',
'perfectly', 'daughter'],
['snort', 'cheap', 'brain', 'teach' ,'shoot' ,'inject' ,'freak', 'type', 'black',
'absolute'],
['substitution', 'suboxone', 'country' ,'clinic', 'nerve', 'representation',
'2', 'website' ,'youtuber', 'insane']]
words_ex = np.array(words_ex)
fig, ax = plt.subplots(figsize=(16, 9))
sc = embeddings_ex[:, :, 0].flatten()
sw = embeddings_ex[:, :, 1].flatten()
plt.scatter(sc, sw, s=45, marker='o', alpha=0.2, color="none", edgecolors='k')
# annotate(ax, sc, sw, words, size=11)
# fill points that are seed words and make font bold
# Okabe and Ito color palette
colors = ['#FA4D4D', '#FBC93D', '#E37E3B', '#C13BE3', '#4B42FD', '#D55E00', '#CC79A7']
for i, word in enumerate(seed_terms):
plt.scatter(sc[i], sw[i], marker='o', alpha=.9,
color=colors[i], edgecolors='none', s = 100)
plt.annotate(word, alpha=.5, xy=(sc[i], sw[i]), xytext=(
5, 2), textcoords='offset points', ha='right', va='bottom', size=11)
# annotate similar words
for word in seed_terms:
# get the index of the seed word in the list of seed words
idx = seed_terms.index(word)
# get the x and y coordinates of the seed word
x = embeddings_ex[idx, :, 0].flatten()
y = embeddings_ex[idx, :, 1].flatten()
# get the list of similar words
similar_words = words_ex[idx]
# add annotations with smaller font
annotate(ax, x, y, similar_words, size=6)
# legend
plt.legend(seed_terms, loc=4)
plt.grid(False)
# remove axes and frame
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
# ticks
plt.tick_params(axis='both', which='both', bottom=False,
left=False, labelbottom=False, labelleft=False)
The word groups related to the central keyword are taken from the five arrays in a list and are about to be annotated, but since the related word groups are a list, a loop process is required to add scattering and annotation. One thing to be careful of in this method is the order in which the scatter and annotations are drawn. First we need to draw the gray scatter plot, then the scatter and annotations for the related terms, and finally the scatter and annotations for the central terms. The reason is that everything is drawn with the same coordinate data, so the hollow markers will be overwritten after the fill. The attached image controls the overlapping of the annotations, which I assume cannot be achieved with matplotlib alone, so perhaps some other tool is being introduced.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(20230115)
seed_terms = ['clean', 'recovery', 'spiral', 'tolerance', 'program']
embeddings_ex = np.random.rand(5, 10, 2)
embeddings_ex = np.array(embeddings_ex)
words_ex = [['quit', 'finally', 'pill', 'vomit', 'survive' ,'lil', 'chance' ,'chain', 'zero', 'quickly'],
['bullshit' ,'unrelated', 'everywhere', 'appear' ,'probably' ,'deal', 'mistake', 'window', 'comment', 'honest'],
['majority' ,'familiar', 'queer', 'edgy', 'skin', 'withdrawl' ,'sad', 'develop', 'perfectly', 'daughter'],
['snort', 'cheap', 'brain', 'teach' ,'shoot' ,'inject' ,'freak', 'type', 'black', 'absolute'],
['substitution', 'suboxone', 'country' ,'clinic', 'nerve', 'representation', '2', 'website' ,'youtuber', 'insane']]
words_ex = np.array(words_ex)
fig, ax = plt.subplots(figsize=(16, 9))
sc = embeddings_ex[:, :, 0].flatten()
sw = embeddings_ex[:, :, 1].flatten()
plt.scatter(sc, sw, s=45, marker='o', alpha=0.2, color="none", edgecolors='k')
# fill points that are seed words and make font bold
# Okabe and Ito color palette
colors = ['#FA4D4D', '#FBC93D', '#E37E3B', '#C13BE3', '#4B42FD', '#D55E00', '#CC79A7']
# annotate similar words
for word in seed_terms:
# get the index of the seed word in the list of seed words
idx = seed_terms.index(word)
# get the x and y coordinates of the seed word
x = embeddings_ex[idx, :, 0].flatten()
y = embeddings_ex[idx, :, 1].flatten()
# get the list of similar words
similar_words = words_ex[idx]
# add annotations with smaller font
for w,xx,yy in zip(similar_words, x,y):
plt.scatter(xx, yy, s=45, marker='o', color='white', edgecolors=colors[idx])
plt.annotate(w, xy=(xx, yy), xytext=(5,2), textcoords='offset points', ha='right',va='bottom', size=6)
for i, word in enumerate(seed_terms):
plt.scatter(sc[i], sw[i], marker='o', alpha=.9, color=colors[i], edgecolors='none', s=100, label=word)
plt.annotate(word, alpha=.5, xy=(sc[i], sw[i]), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom', size=11)
# legend
plt.legend(loc=4)
plt.grid(False)
# remove axes and frame
plt.gca().spines[:].set_visible(False)
# ticks
plt.tick_params(axis='both', which='both', bottom=False,
left=False, labelbottom=False, labelleft=False)
plt.show()

Add Datalabel on the end of barchart (lollypop)

I'm totally new at using Python for Power BI (or anything really).
I would like to add the value of the bar/scatter at the end of the line. (the datalabel)
Also to have a version where I could have the label inside of the scatter bubble would be cool.
Anyone who could help out here ?
All help appreciated
# libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a dataframe
df = pd.DataFrame({'group': dataset.Genre , 'values': dataset.Revenue})
val = list(dataset.SelectedGenre)
# Reorder it following the values:
ordered_df = df.sort_values(by='values')
my_range=range(1,len(df.index)+1)
# Create a color if the group is "B"
my_color=np.where(ordered_df ['group']== val, 'orange', 'skyblue')
my_size=np.where(ordered_df ['group']== val , 150, 150)
# The vertival plot is made using the hline function
# I load the seaborn library only to benefit the nice looking feature
import seaborn as sns
val = ordered_df['values']
plt.hlines(y=my_range, xmin=0, xmax=val, color=my_color, alpha=1 , linewidth=8)
plt.scatter(val, my_range, color=my_color, s=my_size, alpha=1)
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("What about the B group?", loc='left')
plt.xlabel('Value of the variable')
plt.ylabel('Group')
plt.box(False) #Turn of Black bx around visual
plt.show()
Found it myself
import matplotlib.pyplot as plt
import numpy as np
# Data
x = dataset.Revenue
y = dataset.Genre
labels = dataset.Revenue
val = list(dataset.SelectedGenre)
# Create the figure and axes objects
fig, ax = plt.subplots(1, figsize=(10, 6))
fig.suptitle('Example Of Labelled Scatterpoints')
my_color=np.where(y == val, 'orange', 'skyblue')
my_size=np.where( y == val , 2000, 2000)
# Plot the scatter points
ax.scatter(x, y,
color= my_color, # Color of the dots
s=1000, # Size of the dots
alpha=1, # Alpha of the dots
linewidths=1) # Size of edge around the dots
ax.hlines(y, xmin=0, xmax=x, color= my_color, alpha=1 , linewidth=8)
def human_format(num):
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000
# add more suffixes if you need them
return '%.0f%s' % (round(num), ['', 'K', 'M', 'G', 'T', 'P'][magnitude])
# Add the participant names as text labels for each point
for x_pos, y_pos, label in zip(x, y, labels):
ax.annotate(
human_format(label), # The label for this point
xy=(x_pos, y_pos), # Position of the corresponding point
xytext=(-8, 0), # Offset text by 7 points to the right
textcoords='offset points', # tell it to use offset points
ha='left', # Horizontally aligned to the left
va='center',
color = 'white') # Vertical alignment is centered
plt.box(False) #Turn of Black bx around visual
# Show the plot
plt.show()

Python : How to create a 2D density map/heat map

I'm coding with python.
I have 3 arrays x, y and z, and I would like to do 2d density map of the z values in the plan (x,y) with colorbar.
So in my plot, the color at the point x[0] and y[0] would be determined by the value of z[0], the color at the point x[1] and y[1] would be determined by the value of z[1], etc.
Does anyone know how to do this ?
Thank you
Check out https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html
For different colormaps: https://matplotlib.org/tutorials/colors/colormaps.html
A sample piece of code for your need will be something like this
#--------------------------Plotting starts here---------------------------------#
fig, ax0 = plt.subplots()
im0 = plt.scatter(x,y,s=1,c=z, cmap='bwr')
#------------------if you want to use pcolormesh-------------------
#----------and have Z values stored as a numpy array Data---------------------#
#X,Y = np.meshgrid(x,y)
#im0 = ax0.pcolormesh(X,Y,Data, cmap="YourFavouriteColormap')
cbar = fig.colorbar(im0,ax=ax0)
ax0.set_title("Your title")
plt.xlabel("xlabel")
plt.ylabel("ylabel")
filename = "prefix" + "."+ "fileformat"
plt.savefig(filename)
Edit 1:
From one of your comments, if you have grid data, you can try pcolormesh and try shading, an optional argument for interpolation.
shading{'flat', 'gouraud'}, optional
The fill style, Possible values:
'flat': A solid color is used for each quad. The color of the quad (i, j), (i+1, j), (i, j+1), (i+1, j+1) is given by C[i, j].
'gouraud': Each quad will be Gouraud shaded: The color of the corners (i', j') are given by C[i',j']. The color values of the area in between is interpolated from the corner values. When Gouraud shading is used, edgecolors is ignored.
You can use matplotlib's scatter plots with legends and grid where the size of each circle can be referred to z values. This is an example I got from here:
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
scatter = ax.scatter(volume, amount, c=ranking, s=0.3*(price*3)**2,
vmin=-3, vmax=3, cmap="Spectral")
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: np.sqrt(s/.3)/3)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
plt.show()
Output:
In response to your comment AshlinJP :
Either way I still got the error message : "imshow() got multiple values for keyword argument 'cmap'"
I don't know if it has any importance but I use python 2.7
Actually my code is :
import numpy as np
import matplotlib.pyplot as plt
x,y,z = np.loadtxt('gamma.txt', unpack = True)
fig, ax0 = plt.subplots()
cmap = plt.get_cmap('viridis')
im0 = ax0.imshow(x,y,z, cmap=cmap, interpolation="gaussian")
cbar = fig.colorbar(im0,ax=ax0)
ax0.set_title("Your title")
plt.xlabel("xlabel")
plt.ylabel("ylabel")

How do I offset lines in matplotlib by X points

I'm using matplotlib to plot some data that I wish to annotate with arrows (distance markers). These arrows should be offset by several points so as not to overlap with the plotted data:
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
fig, ax = plt.subplots()
x = [0, 1]
y = [0, 0]
# Plot horizontal line
ax.plot(x, y)
dy = 5/72
offset = transforms.ScaledTranslation(0, dy, ax.get_figure().dpi_scale_trans)
verttrans = ax.transData+offset
# Plot horizontal line 5 points above (works!)
ax.plot(x, y, transform = verttrans)
# Draw arrow 5 points above line (doesn't work--not vertically translated)
ax.annotate("", (0,0), (1,0),
size = 10,
transform=verttrans,
arrowprops = dict(arrowstyle = '<|-|>'))
plt.show()
Is there any way to make lines drawn by ax.annotate() be offset by X points? I wish to use absolute coordinates (e.g., points or inches) instead of data coordinates because the axis limits are prone to changing.
Thanks!
The following code does what I desired. It uses ax.transData and figure.get_dpi():
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
fig, ax = plt.subplots()
x = [0, 1]
y = [0, 0]
ax.plot(x, y)
dy = 5/72
i = 1 # 0 for dx
tmp = ax.transData.transform([(0,0), (1,1)])
tmp = tmp[1,i] - tmp[0,i] # 1 unit in display coords
tmp = 1/tmp # 1 pixel in display coords
tmp = tmp*dy*ax.get_figure().get_dpi() # shift pixels in display coords
ax.plot(x, y)
ax.annotate("", [0,tmp], [1,tmp],
size = 10,
arrowprops = dict(arrowstyle = '<|-|>'))
plt.show()
What's your expected output? If you're just looking to move the arrow you're drawing vertically, the API for annotate is
annotate(s, xy, xytext=None, ...)
so you can draw something like
ax.annotate("", (0,0.01), (1,0.01),
size = 10,
arrowprops = dict(arrowstyle = '<|-|>'))
which is moved up by 0.01 in data coordinates in the y direction. You can also specify coordinates as a fraction of the total figure size in annotate (see doc). Is that what you wanted?

Matplotlib - label each bin

I'm currently using Matplotlib to create a histogram:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as pyplot
...
fig = pyplot.figure()
ax = fig.add_subplot(1,1,1,)
n, bins, patches = ax.hist(measurements, bins=50, range=(graph_minimum, graph_maximum), histtype='bar')
#ax.set_xticklabels([n], rotation='vertical')
for patch in patches:
patch.set_facecolor('r')
pyplot.title('Spam and Ham')
pyplot.xlabel('Time (in seconds)')
pyplot.ylabel('Bits of Ham')
pyplot.savefig(output_filename)
I'd like to make the x-axis labels a bit more meaningful.
Firstly, the x-axis ticks here seem to be limited to five ticks. No matter what I do, I can't seem to change this - even if I add more xticklabels, it only uses the first five. I'm not sure how Matplotlib calculates this, but I assume it's auto-calculated from the range/data?
Is there some way I can increase the resolution of x-tick labels - even to the point of one for each bar/bin?
(Ideally, I'd also like the seconds to be reformatted in micro-seconds/milli-seconds, but that's a question for another day).
Secondly, I'd like each individual bar labeled - with the actual number in that bin, as well as the percentage of the total of all bins.
The final output might look something like this:
Is something like that possible with Matplotlib?
Cheers,
Victor
Sure! To set the ticks, just, well... Set the ticks (see matplotlib.pyplot.xticks or ax.set_xticks). (Also, you don't need to manually set the facecolor of the patches. You can just pass in a keyword argument.)
For the rest, you'll need to do some slightly more fancy things with the labeling, but matplotlib makes it fairly easy.
As an example:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter
data = np.random.randn(82)
fig, ax = plt.subplots()
counts, bins, patches = ax.hist(data, facecolor='yellow', edgecolor='gray')
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins)
# Set the xaxis's tick labels to be formatted with 1 decimal place...
ax.xaxis.set_major_formatter(FormatStrFormatter('%0.1f'))
# Change the colors of bars at the edges...
twentyfifth, seventyfifth = np.percentile(data, [25, 75])
for patch, rightside, leftside in zip(patches, bins[1:], bins[:-1]):
if rightside < twentyfifth:
patch.set_facecolor('green')
elif leftside > seventyfifth:
patch.set_facecolor('red')
# Label the raw counts and the percentages below the x-axis...
bin_centers = 0.5 * np.diff(bins) + bins[:-1]
for count, x in zip(counts, bin_centers):
# Label the raw counts
ax.annotate(str(count), xy=(x, 0), xycoords=('data', 'axes fraction'),
xytext=(0, -18), textcoords='offset points', va='top', ha='center')
# Label the percentages
percent = '%0.0f%%' % (100 * float(count) / counts.sum())
ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
xytext=(0, -32), textcoords='offset points', va='top', ha='center')
# Give ourselves some more room at the bottom of the plot
plt.subplots_adjust(bottom=0.15)
plt.show()
One thing I wanted to add to the plots in the histogram with "density = True" was the relative frequency values for each bin, search but I couldn't find a function that would do that. A solution I made follows as image:
The function:
def label_densityHist(ax, n, bins, x=4, y=0.01, r=2, **kwargs):
"""
Add labels,relative value of bin, to each bin in a density histogram .
:param ax: Object axe of matplotlib
The axis to plot.
:param n: list, array of int, float
The values of the histogram bins.
:param bins: list, array of int, float
The edges of the bins.
:param x: int, float
Related the x position of the bin labels. The higher, the lower the value on the x-axis.
Default: 4
:param y: int, float
Related the y position of the bin labels. The higher, the greater the value on the y-axis.
Default: 0.01
:param r: int
Number of decimal places.
Default: 2
:param **kwargs: Text properties in matplotlib
:return: None
Example
import matplotlib.pyplot as plt
import numpy as np
dados = np.random.randn(100)
axe = plt.gca()
n, bins, _ = axe.hist(x=dados, edgecolor='black')
label_densityHist(axe,n, bins)
plt.show()
Example:
import matplotlib.pyplot as plt
import numpy as np
dados = np.random.randn(100)
axe = plt.gca()
n, bins, _ = axe.hist(x=dados, edgecolor='black')
label_densityHist(axe,n, bins, x=6, fontsize='large')
plt.show()
Reference:
[1]https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text
"""
k = []
# calculate the relative frequency of each bin
for i in range(0,len(n)):
k.append((bins[i+1]-bins[i])*n[i])
# rounded
k = around(k,r); #print(k)
# plot the label/text to each bin
for i in range(0, len(n)):
x_pos = (bins[i + 1] - bins[i]) / x + bins[i]
y_pos = n[i] + (n[i] * y)
label = str(k[i]) # relative frequency of each bin
ax.text(x_pos, y_pos, label, kwargs)
To add SI prefixes to your axis labels you want to use QuantiPhy. In fact, in its documentation it has an example that shows how to do this exact thing: MatPlotLib Example.
I think you would add something like this to your code:
from matplotlib.ticker import FuncFormatter
from quantiphy import Quantity
time_fmtr = FuncFormatter(lambda v, p: Quantity(v, 's').render(prec=2))
ax.xaxis.set_major_formatter(time_fmtr)

Categories

Resources