Adjusting opacity and adding annotation and text to matplotlib figure - python

I want to create some publication quality figures. I want to add text and annotate my points as in my attempt below. I have an issue with my actual figures in .eps format. When I zoom in, the plot points are still visible despite "s = 0.00001" and the opacity of the line appears to change from part to part, looks choppy and not as stylish as I would like it to be. The line also overlaps wit hthe text and points, sometimes it's too messy and hard to read. Any other ideas to breathe some style, and color into my figures would be appreciated as well.
import matplotlib.pyplot as plt
from scipy import stats
x = [1,2,3,4,5,6,7,8,9]
y = [1,2,3,4,5,6,7,8,9]
n = ['A', 'B', 'C', 'D', 'E' , 'F', 'G', 'H', 'I']
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
fig, ax = plt.subplots()
plt.scatter(x, y, marker='o', color = 'k', s = 0.00001)
for i, txt in enumerate(n):
ax.annotate(txt, (x[i],y[i]))
predict_y = [(intercept + (slope * x)) for x in x]
plt.plot(x, predict_y,'k-', alpha=0.4, LineWidth=0.3)
plt.xlabel('Number 1')
plt.ylabel('Number 2')
plt.figtext(.73, .84, u"R²: %0.2f " % r_value**2)
plt.figtext(.73, .79, u"P-value: %0.3f " % p_value)
plt.savefig('test.eps', format = 'eps', dpi=1000)
plt.show()

It's wired, if you don't want points to show, why bother using scatter()? The plot generated in my computer proves fine, and opacity are not supposed to change according to your code, if it does, I doubt if you screen is clean... And to deal with overlaps, I suggest move the texts a little bit.
I made several changes, trying to make it looks better and add some colors(but I'm not sure if that's the 'stylish' you mean):
import matplotlib.pyplot as plt
from scipy import stats
import numpy.random as npran
x = [1,2,3,4,5,6,7,8,9]
y = [1,2,3,4,5,6,7,8,9]
n = ['A', 'B', 'C', 'D', 'E' , 'F', 'G', 'H', 'I']
col = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
fig, ax = plt.subplots()
plt.scatter(x, y, marker='o', color = 'k', s = 1) #make points more obvious
for i, txt in enumerate(n):
#shift texts little
ax.annotate(txt, (x[i]-0.2, y[i]+0.3), fontsize = 15, \
color = [npran.rand() for i in range(3)])#make it colorfull?
#or use color = npran.choice(col)
predict_y = [(intercept + (slope * x)) for x in x]
plt.plot(x, predict_y, 'k-', alpha=0.3, linewidth=0.75) #wider line
plt.xlabel('Number 1', fontsize = 20) #Larger font
plt.ylabel('Number 2', fontsize = 20)
plt.grid(color = 'r') #add grids
# make the texts together, and move it to right-down
plt.figtext(.65, .15, "$R^2: {:.2f}$\n$P-value: {:.3f}$".format(r_value**2, p_value), \
bbox={'facecolor':'red', 'alpha':0, 'pad':10}, fontsize = 15)
#you can change to have the boundry box visible
#like 'bbox={'facecolor':'white', 'alpha':1, 'pad':10}'
plt.show()

Related

How to generate proper legends for scatter plot in python

I am trying to prepare a box and scatter plot for 8 data points in python. I use the following code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
f, ax = plt.subplots()
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
x0 = np.random.normal(1, 0.05, len(x))
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
ax.scatter(x, x0, c=c, s=60, alpha=0.2)
ax.legend(labels=lab, loc="upper left", ncol=8)
It generate a image like the following:
It looks that the legend doesn't have the proper sphere symbols with different colors, which I expected. Beside the colors for the symbols are shallow and light.
So how to generate proper legends with correct symbols and how to make the colors of the symbols brighter and sharper?
I will deeply appreciate it if anyone can help.
Best regards
To make the colours brighter, just raise the alpha value.
For the legend, the order of the plotting matters here, it is better that the boxplot is plotted after the scatter plots. Also, to get for each point a place in the legend, it should b considered as a different graph, for that I used a loop to loop over the values of x, x0 and c. Here's the outcome:
import numpy as np
import matplotlib.pyplot as plt
# init figure
f, ax = plt.subplots()
# values
x = [24.4, 6.7, 19.7, 16.0, 25.1, 19.5, 10, 22.1]
x0 = np.random.normal(1, 0.05, len(x))
# labels and colours
c = ['r', 'b', 'c', 'm', 'y', 'g', 'm', 'k']
lab = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
# put the plots into a list
plots = []
for i in range(len(x)):
p = ax.scatter(x[i], x0[i], c=c[i], s=60, alpha=0.5) # raised the alpha to get sharper colors
plots.append(p)
# plot legends
plt.legend(plots,
labels=lab,
scatterpoints=1,
loc='upper left',
ncol=8,
fontsize=8)
# plot the box plot (the order here matters!)
ax.boxplot(x, vert=False, showmeans=True, showfliers=False)
# save the desired figure
plt.savefig('tt.png')
Output:

Circlify - change the colour of just one of the circles

I have this code:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({
'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]
})
circles = circlify.circlify(
df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1)
)
# Create just a figure and only one subplot
fig, ax = plt.subplots(figsize=(10,10))
# Title
ax.set_title('Basic circular packing')
# Remove axes
ax.axis('off')
# Find axis boundaries
lim = max(
max(
abs(circle.x) + circle.r,
abs(circle.y) + circle.r,
)
for circle in circles
)
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)
# list of labels
labels = df['Name']
# print circles
for circle, label in zip(circles, labels):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.2, linewidth=2,color='#e6d4ff'))
plt.annotate(
label,
(x,y ) ,
va='center',
ha='center',
size=12
)
It produces this output:
I wanted to change the colour of just one of the circles (for example, the biggest circle).
I tried changing the colour from:
color='#e6d4ff'
to, for example, a list of colours:
color=['#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#e6d4ff','#ffc4c4']
with the error:
RGBA sequence should have length 3 or 4
I guess the error is saying if I'm providing a list, then the list should just be RGB dimensions.
Would someone be able to show me? (I couldn't see it in the python graph gallery e.g. [here][2] or the circlify doc here but maybe I've missed it?)
In each call to plt.Circle(...) you're only creating one circle, which has only one color. To assign different colors to different circles, the colors can be added into the for loop, e.g. : for circle, label, color in zip(circles, labels, colors):.
Note that circlify expects the list of values in sorted order, and that the returned list contains the circles sorted from smallest to largest. In your example code, D is the largest circle, but in your plot, you labeled it as F. Sorting the dataframe at the start and using that order helps to keep values and labels synchronized.
Here is the example code, having D as largest and with a different color (the code also changes a few plt. calls to ax. to be more consistent):
import matplotlib.pyplot as plt
import pandas as pd
import circlify
df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
'Value': [10, 2, 23, 87, 12, 65]})
df = df.sort_values('Value') # the order is now ['B', 'A', 'E', 'C', 'F', 'D']
circles = circlify.circlify(df['Value'].tolist(),
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1))
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Basic circular packing')
ax.axis('off')
ax.set_aspect('equal') # show circles as circles, not as ellipses
lim = max(max(abs(circle.x) + circle.r, abs(circle.y) + circle.r, )
for circle in circles)
ax.set_xlim(-lim, lim)
ax.set_ylim(-lim, lim)
labels = df['Name'] # ['B', 'A', 'E', 'C', 'F', 'D']
colors = ['#ffc4c4' if val == df['Value'].max() else '#e6d4ff' for val in df['Value']]
for circle, label, color in zip(circles, labels, colors):
x, y, r = circle
ax.add_patch(plt.Circle((x, y), r, alpha=0.7, linewidth=2, color=color))
ax.annotate(label, (x, y), va='center', ha='center', size=12)
plt.show()

How to resize the x-axis and make it different from the y-axis Matplotlib

I have the following development that I am working on with ElementTree, Pandas and Matplotlib modules in Python:
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
fig = plt.figure(figsize=(18, 20))
plt.suptitle(f'File PXML: {rootXML}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
fig.tight_layout()
i = 1
for name in names_list:
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
x = [n for n in range(len(datax))]
print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
ax = plt.subplot(num_names_list, 1, i)
ax.plot(x, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax.set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax.grid(alpha=0.4)
i += 1
plt.show()
I am getting the following error:
I have been looking for the error and I totally understand that the dimensions of x and y must be equal, but there is the possibility of making a graph where the x-axis is greater than the y-axis? and also the x-axis comes from a variable not related to the y-axis? how would this be?
The x-axis is the count of all the values it has in the Signal element of the xml file: I put it here because of how extensive it is and this value is larger than the y-axis, but how to contemplate the 3 values that I bring from the xml that are Singal Name, Signal Value as y-axis and Count of Signals as x-axis. I really appreciate your comments and help.
IIUC, you are trying to plot several stepped values agains their order of appearance (X-index) in XML file. Then you should plot against original dataframe's X values. I haven't changed your code much for style or such, just fixed a little.
import xml.etree.ElementTree as ET
import pandas as pd
from matplotlib import pyplot as plt
import random
file_xml = 'example_un_child4.xml'
def transfor_data_atri(rootXML):
file_xml = ET.parse(rootXML)
data_XML = [
{"Name": signal.attrib["Name"],
"Value": int(signal.attrib["Value"].split(' ')[0])
} for signal in file_xml.findall(".//Signal")
]
signals_df = pd.DataFrame(data_XML)
extract_name_value(signals_df)
def extract_name_value(signals_df):
#print(signals_df)
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
# Creation Graphic
#fig = plt.figure(figsize=(18, 20), sharex=True)
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 15), sharex=True)
plt.suptitle(f'File PXML: {file_xml}', fontsize=20, fontweight='bold', color='SteelBlue', position=(0.75, 0.90))
#fig.tight_layout()
i = 1
for pos, name in enumerate(names_list):
# get data
data = signals_df[signals_df["Name"] == name]["Value"]
datax = signals_df["Name"]
# x = [n for n in range(len(data))]
#x = [n for n in range(len(datax))]
#print(x)
# get color
j = random.randint(0, len(colors) - 1)
# add subplots
#ax[pos] = plt.subplot(num_names_list, 1, i)
ax[pos].plot(data.index, data, drawstyle='steps', marker='o', color=colors[j], linewidth=3)
# plt.xticks(None)
ax[pos].set_ylabel(name, fontsize=12, fontweight='bold', color='SteelBlue', rotation=50, labelpad=45)
ax[pos].grid(alpha=0.4)
i += 1
fig.tight_layout()
plt.show()
transfor_data_atri(file_xml)

Adding Percentages to sns.countplot - how do I show percentages for two values within the categories?

Hi I'm trying to add percentages to my countplot with 5 categories and 2 values (old and younger). I've tried adding the def and loop from
How to add percentages on top of bars in seaborn?
My code:
plt.figure(figsize =(7,5))
ax = sb.countplot(data = df_x_1, x = 'concern_virus', hue = 'age')
plt.xticks(size =12)
plt.xlabel('Level of Concern', size = 14)
plt.yticks(size = 12)
plt.ylabel('Number of People', size = 12)
plt.title("Older and Younger People's Concern over the Virus", size = 16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right");
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width()
y = p.get_height()
ax.annotate(percentage, (x, y),ha='center')
plt.show()
As you can see, the percentages don't make sense.
The problem seems to be with the variable that is undefined in the above code: total. total should be the number you want to call 100%, for example the total number of rows in the dataframe. That way all the displayed percentages sum up to 100.
Here is some sample code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
N = 250
df_x_1 = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], N),
'age': np.random.choice(['younger', 'older'], N)})
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=['younger', 'older'],
palette=['chartreuse', 'darkviolet'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
total = len(df_x_1)
for p in ax.patches:
percentage = f'{100 * p.get_height() / total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()
To have the text in the center of the bar, it helps to choose ha='center' and add half the width to the x-position. Appending a newline to the text can help to position the text nicely on top of the bar. plt.tight_layout() can help to fit all the labels into the plot.
Seaborn lets you fix the order of the x-axis via order=.... The order of the legend elements and the corresponding colors can be set via hue_order=... and palette=....
PS: For the new question, with totals per age group, instead of directly looping through all the bars, a first loop can visit the groups:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
label_younger = 'younger'
label_older = 'older'
df_younger = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 230)})
df_older = pd.DataFrame({'concern_virus': np.random.choice(['a', 'b', 'c', 'd', 'e'], 120)})
df_younger['age'] = label_younger
df_older['age'] = label_older
df_x_1 = pd.concat([df_younger, df_older], ignore_index=True)
plt.figure(figsize=(7, 5))
ax = sns.countplot(data=df_x_1, x='concern_virus', order=['a', 'b', 'c', 'd', 'e'],
hue='age', hue_order=[label_younger, label_older],
palette=['orangered', 'skyblue'])
plt.xticks(size=12)
plt.xlabel('Level of Concern', size=14)
plt.yticks(size=12)
plt.ylabel('Number of People', size=12)
plt.title("Older and Younger People's Concern over the Virus", size=16)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
for bars in ax.containers:
if bars.get_label() == label_younger:
group_total = len(df_younger)
else:
group_total = len(df_older)
for p in bars.patches:
# print(p.get_facecolor(), p.get_label())
percentage = f'{100 * p.get_height() / group_total:.1f}%\n'
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='center')
plt.tight_layout()
plt.show()

How to put the axis for two subplots separately?

I am trying to have two sub-plots in the figure in the Python Script. But I am not able to set the axis separately according to my inputs. Can anybody help me to set the x-axis, y-axis for each of the sub-plot separately?
I am including the piece of code that I have done, which was not giving me the result.
fig = plt.figure()
ax = plt.subplot(121) # To show the ascending order
plt.xlabel ('RF Input Power (dBm)', fontsize = 'small')
plt.ylabel ('Gain (dB)', fontsize = 'small')
tx = plt.title('Gain Vs Ascending RFInputAmpl for ' + str(measuredFrequencyUnderTest) + 'MHz', fontsize = 'small')
axPlotAxis = plt.axis([rfInputMin, rfInputMax, -20.0, 30.0])
# Now, Plot all the gain stages across the RFInput
ax.plot(rfInput_Plot, lna_Pre_Plot, color = 'r', marker = '+', label = 'lna_Pre')
ax.plot(rfInput_Plot, lna_Post_Plot, color = 'm', marker = 'x', label = 'lna_Post')
ax.plot(rfInput_Plot, sampler1_Plot, color = 'k', marker = '*', label = 'Sampler1')
ax.plot(rfInput_Plot, sampler2_Plot, color = 'c', marker = 's', label = 'Sampler2')
ax.plot(rfInput_Plot, vga_Plot, color = 'b', marker = 'p', label = 'VGA')
ax.plot(rfInput_Plot, dagc1_Plot, color = 'g', marker = 'H', label = 'DAGC1')
ax.plot(rfInput_Plot, dagc2_Plot, color = 'y', marker = 'v', label = 'DAGC2')
# Put the Legend
ax.legend(loc='upper center', bbox_to_anchor = (1.3, -0.05), shadow=True,numpoints = 1, prop = legend_font_props, ncol = 3)
# Now, come to the second plot
ay = plt.subplot(122) # To show the descending order
plt.xlabel ('RF Input Power (dBm)', fontsize = 'small')
plt.ylabel ('Gain (dB)', fontsize = 'small', horizontalalignment = 'left')
ty = plt.title('Gain Vs Descending RF Input for '+ str(measuredFrequencyUnderTest)+ 'MHz', fontsize = 'small')
# Now, fix the x axis here in descending order
plt.axis([rfInputMax, rfInputMin, y_Min_Value, y_Max_Value])
plt.minorticks_on()
Is there something wrong that I am performing? Pls help me to correct it.
You can set independent scales very easily (from an ipython -pylab session):
In [8]: ax = plt.subplot(121)
In [9]: ay = plt.subplot(122)
In [10]: ax.set_xlim((-1,2))
In [11]: ax.set_ylim((10,20))
In [12]: ay.set_xlim((-10,4))
In [13]: ay.set_ylim((3,5))

Categories

Resources