How can I change feature names fonts and image size - python

I am working on plotting features' importance between two different perspectives as in this image features importance.
How can I change the size of feature fonts and image size to allow all feature names to fit into the image size since I have several features?
This is my code. I hope someone can help me. Thanks a lot.
# Libraries
import matplotlib.pyplot as plt
import pandas as pd
from math import pi
# Set data
df = pd.DataFrame({
'group': ['News','Response'],
'user/favourites_count': [.73,5.75],
'name_length_in _char': [.32,4.27],
'user/followers_count': [13.82 ,4],
'screen_name_length_in _char': [.64,3.31],
'user/friends_count': [.84, 2.14],
'user/description_length_in _char': [1.1, 1.83],
'user/geo_enabled': [.29,1.6],
'user/listed_count': [11.35,.95],
'reply_count': [9.43,1.44],
'reputation_score': [4.34,1.31],
'user_effect': [4.42,1.2],
'user_engagment': [2,.83],
'favorite_count': [2.24,.81],
'user/statuses_count': [2.51,.75],
'user/media_count': [7.18 ,0.056],
'user/verified': [8.73, 0.03],
'user_url_statues': [9.52, 3.4],
'user_describtion_statues': [1.1, 3.6],
})
# ------- PART 1: Create background
# number of variable
categories=list(df)[1:]
N = len(categories)
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
plt.rc('xtick', labelsize=6)
plt.rc('ytick', labelsize=6)
# If you want the first axis to be on top:
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
# Draw one axe per variable + add labels
plt.xticks(angles[:-1], categories)
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([15,25,35], ["15","25","35"], color="grey", size=5)
plt.ylim(0,14)
# ------- PART 2: Add plots
# Plot each individual = each line of the data
# I don't make a loop, because plotting more than 3 groups makes the chart unreadable
# Ind1
values=df.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=2, linestyle='solid', label="News")
ax.fill(angles, values, 'b', alpha=0.1)
# Ind2
values=df.loc[1].drop('group').values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, linewidth=2, linestyle='solid', label="Response")
ax.fill(angles, values, 'r', alpha=0.1)
# Add legend
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
# Show the graph
plt.show()

Related

How can I make a problem matrix with percentage using matplotlib and seaborn?

I want to make this type of graph you see below.
I get that I can make a matrix graph with matplotlib
like so
cmap = colors.ListedColormap(['white','red'])
data = [
[0,0,0,0,0,1,1,1,1,],
[0,0,0,0,0,1,0,0,1,],
]
plt.figure(figsize=(9,5))
plt.pcolor(data[::-1],cmap=cmap,edgecolors='k', linewidths=3)
plt.xlabel('Problem')
plt.ylabel('Particpant')
plt.show()
But how would I go about adding percentages to be included in this graph?
You can add a secondary x-axis (ax.twiny()), using the top axis for the numbering and the bottom axis to show the percentages.
Calling pcolor with a list of x and y positions that are 0.5 shifted will put the ticks and tick labels at integer positions. clip_on=False makes sure the outer cell borders have the same thickness as the rest. ax.invert_yaxis() lets you invert the y axis (so you can use data instead of data[::-1]).
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
cmap = ListedColormap(['white', 'orangered'])
data = np.random.randint(0, 3, size=(28, 30)) % 2
data[:, 9] = 1 # one full column to simulate 100%
data[:, 11] = 0 # one empty column to simulate 0%
fig, ax = plt.subplots(figsize=(9, 5))
ax.pcolor(np.arange(data.shape[1] + 1) + 0.5, np.arange(data.shape[0] + 1) + 0.5, data,
cmap=cmap, edgecolors='k', linewidths=3, clip_on=False)
ax.set_yticks(range(1, data.shape[0] + 1))
ax.set_xticks(range(1, data.shape[1] + 1))
ax.set_xticklabels([f'{p:.0f}' for p in data.mean(axis=0) * 100])
ax.invert_yaxis()
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(range(1, data.shape[1] + 1))
ax2.set_xlabel('Problem')
ax.tick_params(length=0)
ax2.tick_params(length=0)
ax.set_ylabel('Particpant')
plt.tight_layout()
plt.show()
Decreasing the fontsize (or increasing the figsize) allows to also show the percentage sign:
ax.set_xticklabels([f'{p:.0f}%' for p in data.mean(axis=0) * 100], fontsize=8)

Add horizontal line with conditional coloring

I make a contourf plot using matplotlib.pyplot. Now I want to have a horizontal line (or something like ax.vspan would work too) with conditional coloring at y = 0. I will show you what I have and what I would like to get. I want to do this with an array, let's say landsurface that represents either land, ocean or ice. This array is filled with 1 (land), 2 (ocean) or 3 (ice) and has the len(locs) (so the x-axis).
This is the plot code:
plt.figure()
ax=plt.axes()
clev=np.arange(0.,50.,.5)
plt.contourf(locs,height-surfaceheight,var,clev,extend='max')
plt.xlabel('Location')
plt.ylabel('Height above ground level [m]')
cbar = plt.colorbar()
cbar.ax.set_ylabel('o3 mixing ratio [ppb]')
plt.show()
This is what I have so far:
This is what I want:
Many thanks in advance!
Intro
I'm going to use a line collection .
Because I have not your original data, I faked some data using a simple sine curve and plotting on the baseline the color codes corresponding to small, middle and high values of the curve
Code
Usual boilerplate, we need to explicitly import LineCollection
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
Just to plot something, a sine curve (x r
x = np.linspace(0, 50, 101)
y = np.sin(0.3*x)
The color coding from the curve values (corresponding to your surface types) to the LineCollection colors, note that LineCollection requires that the colors are specified as RGBA tuples but I have seen examples using color strings, bah!
# 1 when near min, 2 when near 0, 3 when near max
z = np.where(y<-0.5, 1, np.where(y<+0.5, 2, 3))
col_d = {1:(0.4, 0.4, 1.0, 1), # blue, near min
2:(0.4, 1.0, 0.4, 1), # green, near zero
3:(1.0, 0.4, 0.4, 1)} # red, near max
# prepare the list of colors
colors = [col_d[n] for n in z]
In a line collection we need a sequence of segments, here I have decided to place my coded line at y=0 but you can just add a constant to s to move it up and down.
I admit that forming the sequence of segments is a bit tricky...
# build the sequence of segments
s = np.zeros(101)
segments=np.array(list(zip(zip(x,x[1:]),zip(s,s[1:])))).transpose((0,2,1))
# and fill the LineCollection
lc = LineCollection(segments, colors=colors, linewidths=5,
antialiaseds=0, # to prevent artifacts between lines
zorder=3 # to force drawing over the curve) lc = LineCollection(segments, colors=colors, linewidths=5) # possibly add zorder=...
Finally, we put everything on the canvas
# plot the function and the line collection
fig, ax = plt.subplots()
ax.plot(x,y)
ax.add_collection(lc)
I would suggest adding an imshow() with proper extent, e.g.:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colorbar as colorbar
import matplotlib.colors as colors
### generate some data
np.random.seed(19680801)
npts = 50
x = np.random.uniform(0, 1, npts)
y = np.random.uniform(0, 1, npts)
X,Y=np.meshgrid(x,y)
z = x * np.exp(-X**2 - Y**2)*100
### create a colormap of three distinct colors for each landmass
landmass_cmap=colors.ListedColormap(["b","r","g"])
x_land=np.linspace(0,1,len(x)) ## this should be scaled to your "location"
## generate some fake landmass types (either 0, 1, or 2) with probabilites
y_land=np.random.choice(3, len(x), p=[0.1, 0.6, 0.3])
print(y_land)
fig=plt.figure()
ax=plt.axes()
clev=np.arange(0.,50.,.5)
## adjust the "height" of the landmass
x0,x1=0,1
y0,y1=0,0.05 ## y1 is the "height" of the landmass
## make sure that you're passing sensible zorder here and in your .contourf()
im = ax.imshow(y_land.reshape((-1,len(x))),cmap=landmass_cmap,zorder=2,extent=(x0,x1,y0,y1))
plt.contourf(x,y,z,clev,extend='max',zorder=1)
ax.set_xlim(0,1)
ax.set_ylim(0,1)
ax.plot()
ax.set_xlabel('Location')
ax.set_ylabel('Height above ground level [m]')
cbar = plt.colorbar()
cbar.ax.set_ylabel('o3 mixing ratio [ppb]')
## add a colorbar for your listed colormap
cax = fig.add_axes([0.2, 0.95, 0.5, 0.02]) # x-position, y-position, x-width, y-height
bounds = [0,1,2,3]
norm = colors.BoundaryNorm(bounds, landmass_cmap.N)
cb2 = colorbar.ColorbarBase(cax, cmap=landmass_cmap,
norm=norm,
boundaries=bounds,
ticks=[0.5,1.5,2.5],
spacing='proportional',
orientation='horizontal')
cb2.ax.set_xticklabels(['sea','land','ice'])
plt.show()
yields:

matplotlib basemap plotting legend corresponding to size of points on map

I'm using matplotlib's basemap functionality to plot data points on a map. Each point is weighed by how many co-occurring points exist within a 5-kM radius. I'd like to put a reference table that corresponds to different-sized outbreaks at the bottom, however I can't figure out how to do this. This is my code so far:
map = Basemap(llcrnrlon=-20.,llcrnrlat=-40,urcrnrlon=160.,urcrnrlat=40.,projection='cyl', lat_0=13.5317, lon_0=2.4604)
map.drawmapboundary(fill_color='paleturquoise')
map.fillcontinents(color='olivedrab',lake_color='paleturquoise')
map.drawcoastlines()
map.drawcountries()
map.drawstates()
used = set()
for i,j,k,n in DATA:
if map.is_land(i,j):
if k in used: continue
used.add(k)
alpha = 0.5
if n == 1:
alpha = 1
n *= 3
map.plot(i, j, marker='o',color='r',ms=n, alpha=alpha)
plt.show()
note, DATA is a list of 4-tuples. Each entry in the 4-tuple corresponds to the (latitude, longitude, unique ID corresponding to points co-occuring within a 5x5 km square, number of points with the same uniq ID)
result:
The most obvious option is to first create custom labels and handles in matplotlib and then to intitialize a custom legend with them. For instance, if we choose a "showcase" sample of five point sizes, ranging from 1 to 5, you might want to do something along the lines of:
def outbreak_artist(size):
""" Returns a single-point marker artist of a given size """
# note that x3 factor corresponds to your
# internal scaling within the for loop
return plt.Line2D((0,), (0,), color='r', marker='o',
ms=size*3, alpha=alpha, linestyle='')
sizes = [1, 2, 3, 4, 5]
# adapted from https://stackoverflow.com/a/4701285/4118756
# to place the legend beneath the figure neatly
ax = plt.gca()
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
box.width, box.height * 0.9])
red_dots = [outbreak_artist(size) for size in sizes]
labels = ["{} outbreaks".format(size) for size in sizes]
ax.legend(red_dots, labels, loc='upper center',
bbox_to_anchor=(0.5, -0.05), ncol=5)
plt.show()
I fiddled with the legend positioning a bit to bring it out of the plot following this post.
P.S.: I think I ran fig, ax = plt.subplots(figsize = (9.44, 4.76)) before making a Basemap to make the legend size align with the map size.

User defined legend in python

I have this plot in which some areas between curves are being filled by definition. Is there any way to include them in legend? Especially where those filled areas are overlapped and as well as that a new and different color is being appeared.
Or there is possibility to define an arbitrary legend regardless of the curves' data?
Using fill_bettween to plot your data will automatically include the filled area in the legend.
To include the areas where the two datasets overlap, you can combine the legend handles from both dataset into a single legend handle.
As pointed out in the comments, you can also define any arbitrary legend handle with a proxy.
Finally, you can define exactly what handles and labels you want to appear in the legend, regardless of the data plotted in your graph.
See the MWE below that illustrates the points stated above:
import matplotlib.pyplot as plt
import numpy as np
plt.close('all')
# Gererate some datas:
x = np.random.rand(50)
y = np.arange(len(x))
# Plot data:
fig, ax = plt.subplots(figsize=(11, 4))
fillA = ax.fill_between(y, x-0.25, 0.5, color='darkolivegreen', alpha=0.65, lw=0)
fillB = ax.fill_between(y, x, 0.5, color='indianred', alpha=0.75, lw=0)
linec, = ax.plot(y, np.zeros(len(y))+0.5, color='blue', lw=1.5)
linea, = ax.plot(y, x, color='orange', lw=1.5)
lineb, = ax.plot(y, x-0.25, color='black', lw=1.5)
# Define an arbitrary legend handle with a proxy:
rec1 = plt.Rectangle((0, 0), 1, 1, fc='blue', lw=0, alpha=0.25)
# Generate the legend:
handles = [linea, lineb, linec, fillA, fillB, (fillA, fillB),
rec1, (fillA, fillB, rec1)]
labels = ['a', 'b', 'c', 'A', 'B', 'A+B', 'C', 'A+B+C']
ax.legend(handles, labels, loc=2, ncol=4)
ax.axis(ymin=-1, ymax=2)
plt.show()
Yes, you are absolutely right ian_itor, tacaswell and Jean-Sébastien, user defined legend seems to be the unique solution, in addition I made different linewidth for those area to be distinguishable from the curves, and playing with alpha got the right color.
handles, labels = ax.get_legend_handles_labels()
display = (0,1,2,3,4)
overlap_1 = plt.Line2D((0,1),(0,0), color='firebrick', linestyle='-',linewidth=15, alpha = 0.85)
overlap_2= plt.Line2D((0,1),(0,0), color='darkolivegreen',linestyle='-',linewidth=15, alpha = 0.65)
over_lo_3= plt.Line2D((0,1),(0,0), color='indianred',linestyle='-',linewidth=15, alpha = 0.75)
ax.legend([handle for i,handle in enumerate(handles) if i in display]+[overlap_1 , overlap_2 , overlap_3 ],
[label for i,label in enumerate(labels) if i in display]+['D','F','G'])

How to make a scatter plot of different sizes, colour, and positions?

How to make a scatter plot with random values of delta_x and delta_y positions; where each point has certain frequency value(color intensity changes depending upon intensity), i.e., a certain symbols.
Example plot: (from Alberdi, et al, 2013)
If I'm understanding you correctly, you're asking how to have scatter share a color scale but have different symbols for different groups, correct?
There are a few different ways to handle this.
The key is to call scatter multiple times (one for each different group), but pass in the same vmin, vmax, and cmap arguments.
As an complete (and arguably over-complex) example of reproducing the plot above:
import numpy as np
import matplotlib.pyplot as plt
# Generate data
freq_groups = [1.7, 2.3, 5.0, 8.4]
num = 50
x = np.random.normal(0, 0.5, num)
y = np.random.normal(0.2, 0.5, num)
year = 9 * np.random.random(num) + 1993.5
frequencies = np.random.choice(freq_groups, num)
symbols = ['o', '^', 's', 'd']
# Plot data
fig, ax = plt.subplots(figsize=(8, 9))
for freq, marker in zip(freq_groups, symbols):
mask = np.isclose(freq, frequencies)
scat = ax.scatter(x[mask], y[mask], c=year[mask], s=100, marker=marker,
cmap='jet_r', vmin=year.min(), vmax=year.max(),
label='{:0.1f} GHz'.format(freq), color='black')
ax.legend(loc='upper left', scatterpoints=1)
ax.set(xlabel='Relative RA (mas)', ylabel='Relative Dec (mas)')
ax.invert_xaxis()
cbar = fig.colorbar(scat, orientation='horizontal')
cbar.set_label('Epoch (year)')
cbar.formatter.useOffset = False
cbar.update_ticks()
fig.tight_layout()
plt.show()

Categories

Resources