I'm trying to write a python program that displays an animation of a map of the world where countries change color based on how much renewable energy use they have. I'm trying to have it display the colors for all countries in year 1960, then the colors for all countries in the year 1961, then 1962...
I'm using cartopy to add countries to the figure and basing their color off of values that I pull into a pandas dataframe from a SQL database. I was able to get the map to show what I want for one year like this:
However, I can't figure out how to animate it. I tried using FuncAnimate, but I'm really struggling to understand how it works. All the examples seem to have functions that return lines, but I'm not graphing lines or contours. Here is what I tried:
import sqlite3
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.animation as animation
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
from math import log
from math import exp
from matplotlib import colors
path = 'H:/USER/DVanLunen/indicator_data/world-development-indicators/'
os.chdir(path)
con = sqlite3.connect('database.sqlite')
# Grab :
# % of electricity from renewable sources EG.ELC.RNWX.ZS
# 1960 - 2013
Indicator_df = pd.read_sql('SELECT * '
'FROM Indicators '
'WHERE IndicatorCode in('
'"EG.ELC.RNWX.ZS"'
')'
, con)
# setup colorbar stuff and shape files
norm = mpl.colors.Normalize(vmin=0, vmax=30)
colors_in_map = []
for i in range(30):
val = log(i + 1, logbase) / log(31, logbase)
colors_in_map.append((1 - val, val, 0))
cmap = colors.ListedColormap(colors_in_map)
shpfilename = shpreader.natural_earth(resolution='110m',
category='cultural',
name='admin_0_countries')
reader = shpreader.Reader(shpfilename)
countries_map = reader.records()
logbase = exp(1)
fig, ax = plt.subplots(figsize=(12, 6),
subplot_kw={'projection': ccrs.PlateCarree()})
def run(data):
"""Update the Dist"""
year = 1960 + data % 54
logbase = exp(1)
for n, country in enumerate(countries_map):
facecolor = 'gray'
edgecolor = 'black'
indval = Indicator_df.loc[(Indicator_df['CountryName'] ==
country.attributes['name_long']) &
(Indicator_df['Year'] == year), 'Value']
if indval.any():
greenamount = (log(float(indval) + 1, logbase) /
log(31, logbase))
facecolor = 1 - greenamount, greenamount, 0
ax.add_geometries(country.geometry, ccrs.PlateCarree(),
facecolor=facecolor, edgecolor=edgecolor)
ax.set_title('Percent of Electricity from Renewable Sources ' +
str(year))
ax.figure.canvas.draw()
cax = fig.add_axes([0.92, 0.2, 0.02, 0.6])
cb = mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm,
spacing='proportional')
cb.set_label('%')
ani = animation.FuncAnimation(fig, run, interval=200, blit=False)
plt.show()
Any help would be greatly appreciated. Thanks!
Some example data for Indicator_df (not real):
CountryName Year Value
United States 1960 5
United States 1961 10
United States 1962 20
United States 1963 30
There are actually several problems with how you've set up your run(), but the major problem appeared to actually be the enumate(countries_map). The records() function returns a generator, which once you've run through it once doesn't appear to like being run through again - I tried it separate from the animation to make sure.
That said, the problem can be avoided entirely by moving a lot of code out of the run(). Currently, even if it worked you're re-drawing every single country every frame, not just the ones with colors. It's both intensive and unnecessary - you don't need to draw any gray ones more than once.
I've restructured your code a bit and with the fake data I put in for the US and Argentina it works fine for me.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.animation as animation
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
from math import log
from math import exp
from matplotlib import colors
from shapely.geometry.multipolygon import MultiPolygon
# Grab :
# % of electricity from renewable sources EG.ELC.RNWX.ZS
# 1960 - 2013
# Make fake data
Indicator_df = pd.DataFrame({
'CountryName': ['United States'] * 4 + ['Argentina'] * 4,
'Year': [1960, 1961, 1962, 1963] * 2,
'Value': [5, 10, 20, 30] * 2
})
# setup colorbar stuff and shape files
norm = mpl.colors.Normalize(vmin=0, vmax=30)
colors_in_map = []
logbase = exp(1)
for i in range(30):
val = log(i + 1, logbase) / log(31, logbase)
colors_in_map.append((1 - val, val, 0))
cmap = colors.ListedColormap(colors_in_map)
shpfilename = shpreader.natural_earth(resolution='110m',
category='cultural',
name='admin_0_countries')
reader = shpreader.Reader(shpfilename)
countries_map = reader.records()
# These don't need to constantly be redefined, especially edgecolor
facecolor = 'gray'
edgecolor = 'black'
fig, ax = plt.subplots(figsize=(12, 6),
subplot_kw={'projection': ccrs.PlateCarree()})
# Draw all the gray countries just once in an init function
# I also make a dictionary for easy lookup of the geometries by country name later
geom_dict = {}
def init_run():
for n, country in enumerate(countries_map):
if country.geometry.type == "Polygon":
geom = MultiPolygon([country.geometry])
else:
geom = country.geometry
ax.add_geometries(geom,
ccrs.PlateCarree(),
facecolor=facecolor,
edgecolor=edgecolor)
geom_dict[country.attributes['NAME_LONG']] = geom
def run(data):
"""Update the Dist"""
# "data" in this setup is a frame number starting from 0, so it corresponds nicely
# with your years
# data = 0
year = 1960 + data
# get a subset of the df for the current year
year_df = Indicator_df[Indicator_df['Year'] == year]
for i, row in year_df.iterrows():
# This loops over countries, gets the value and geometry and adds
# the new-colored shape
geom = geom_dict[row['CountryName']]
value = row['Value']
greenamount = (log(float(value) + 1, logbase) / log(31, logbase))
facecolor = 1 - greenamount, greenamount, 0
ax.add_geometries(geom,
ccrs.PlateCarree(),
facecolor=facecolor,
edgecolor=edgecolor)
# I decreased the indent of this, you only need to do it once per call to run()
ax.set_title('Percent of Electricity from Renewable Sources ' + str(year))
cax = fig.add_axes([0.92, 0.2, 0.02, 0.6])
cb = mpl.colorbar.ColorbarBase(cax,
cmap=cmap,
norm=norm,
spacing='proportional')
cb.set_label('%')
ani = animation.FuncAnimation(fig,
run,
init_func=init_run,
frames=4,
interval=500,
blit=False)
ani.save(filename="test.gif")
The primary difference is that I'm not accessing the shpreader at all inside the run function. When making an animation, the only thing that should be in the run function are things that change, you don't need to re-draw everything every frame.
That said, this could be even better if you just keep the artist from the very first draw and just change the color of it in the run function, instead of doing a whole new ax.add_geometries. You'll have to look into how to change the color of a cartopy FeatureArtist for that.
Just to address the second point about not having to draw the whole shape again:
Instead of storing the shape information, store the feature artist, i.e.:
feature_artist = ax.add_geometries(country.geometry, ccrs.PlateCarree(),
facecolor=facecolor, edgecolor=edgecolor)
geom_dict[country.attributes['name_long']] = feature_artist
Then, in the updating loop, instead of calling ax.add_geometries again, call the following:
geom._feature._kwargs['facecolor'] = facecolor
This will update the facecolor. (You could also change the adgecolor - since it stays the same, you can leave that away.)
Related
I have a dataset with: 'latitudine'; 'longitudine'; 'created_at'.
'created_at' has the format such as: 24/11/2019 01:00:00. Inside 'created_at' there are only two date 24 and 25 november 2019 with different hours.
I used this script to get map with bubbles with different radius, but bubbles have the same color (red). It's possible to get one color for each date (in this case 2 colors, one for 24 november and one for 25 november)?
This is the dataset:[dataset1
import pandas as pd
# Load the dataset into a pandas dataframe.
df = pd.read_csv("autostrada_a_6.csv", delimiter=';', error_bad_lines=False)
import folium
locations = df.groupby(by=['latitudine','longitudine'])\
.count()['created_at']\
.sort_values(ascending=False)
locations = locations.to_frame('value')
Make an empty map
m = folium.Map(location=[df['latitudine'].mean(), df['longitudine'].mean()], tiles="Stamen Toner", zoom_start=8)
def get_radius(freq):
if freq < 5:
return 5
elif freq < 15:
return 15
elif freq < 257:
return 45
for i,row in locations.iterrows():
#print(i,row)
folium.CircleMarker(
location=[i[0], i[1]],
radius=get_radius(row[0]),
color='crimson',
fill=True,
fill_color='crimson'
).add_to(m)
m
Or applying another script, but I have some problem, because I would like that the radius, in this case 's' was on basis of counting:
# Basemap library
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
# Set the dimension of the figure
plt.rcParams["figure.figsize"]=15,10;
# Make the background map
m=Basemap(llcrnrlon=-180, llcrnrlat=-65, urcrnrlon=180, urcrnrlat=80, projection='merc');
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0);
m.fillcontinents(color='grey', alpha=0.3);
m.drawcoastlines(linewidth=0.1, color="white");
locations = df.groupby(by=['latitudine','longitudine'])\
.count()['created_at']\
.sort_values(ascending=False)
locations = locations.to_frame('value')
Make the background map
m=Basemap(llcrnrlon=-180, llcrnrlat=-65, urcrnrlon=180, urcrnrlat=80)
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
m.fillcontinents(color='grey', alpha=0.3)
m.drawcoastlines(linewidth=0.1, color="white")
# prepare a color for each point depending on the continent.
df['label'] = pd.factorize(data['created_at'])[0]
# Add a point per position
m.scatter(
x=data['homelon'],
y=data['homelat'],
s=data['n']/6,
alpha=0.4,
c=data['label'],
cmap="Set1"
)
I am trying to plot the groundtrack of a satellite through a combination of packages, animate the satellite movement, mark a field of view from the subsatellite point (which is just arbitrary circles in this code) and then export the file as a video of some kind. So far, I have been able to do all of this except that when I try to export the video, the Nightshade feature doesn't animate so much as overlay and eventually blacks out most of the screen. Is there something I'm missing on how to properly animate the Nightshade feature? I know that I'm essentially recreating a new feature inside the update function everytime it runs a frame but I could not figure out how to update it as I do the scatter plots.
I've included my sample code below.
import pandas as pd
from sgp4.api import WGS72
from sgp4.api import Satrec
from skyfield.api import EarthSatellite, load, N, W, wgs84
import datetime
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.animation as animation
from cartopy.feature.nightshade import Nightshade
# CREATE THE SATELLITE DATA
epoch = datetime.date(1949, 12, 31)
sat = Satrec()
sat.sgp4init(
WGS72, # gravity model
'i', # 'a' = old AFSPC mode, 'i' = improved mode
5, # satnum: Satellite number
(datetime.date.today() - epoch).days, # epoch: days since 1949 December 31 00:00 UT
0, # bstar: drag coefficient (1/earth radii)
6.969196665e-13, # ndot (NOT USED): ballistic coefficient (revs/day)
0.0, # nddot (NOT USED): mean motion 2nd derivative (revs/day^3)
0.1, # ecco: eccentricity
280 * np.pi / 180, # argpo: argument of perigee (radians)
50 * np.pi / 180, # inclo: inclination (radians)
275 * np.pi / 180, # mo: mean anomaly (radians)
0.0472294454407, # no_kozai: mean motion (radians/minute)
50 * np.pi / 180, # nodeo: right ascension of ascending node (radians)
)
# DEFINE A FEW BASIC PARAMETERS FOR THE PROGRAM
P = sat.mo / sat.no_kozai # min, period of orbit. LEOs orbit between 84-127 minutes
ts = load.timescale()
sat1 = EarthSatellite.from_satrec(sat, ts)
hours = np.arange(0, 6, 0.05)
time = ts.utc(2021, 6, 31, hours)
pos = sat1.at(time).position.km
pos_ec = sat1.at(time).ecliptic_position().km
sp = wgs84.subpoint(sat1.at(time))
latitude = sp.latitude
longitude = sp.longitude
elev = sp.elevation
# CREATE A DATAFRAME OF THE DATA FOR REVIEW LATER IF NEEDED
df = pd.DataFrame([time.utc_datetime(), latitude.degrees, longitude.degrees, elev.km],
index=['DTS', 'lat', 'lon', 'elev']).T
df.lat = df.lat.astype('float32')
df.lon = df.lon.astype('float32')
df.elev = df.elev.astype('float32')
df.set_index('DTS', inplace=True)
# ASSIGN RELEVANT DATA FOR THE SUBSATELLITE POINT
ssp = np.transpose(np.array([longitude.degrees, latitude.degrees]))
line = ssp.copy()
pos = np.where(np.diff(np.abs(line[:, 0] >= 0)))[0]
line[pos, :] = np.nan
# CREATE DATE TIME RANGES FOR USE WITH THE NIGHTSHADE FEATURE
base = datetime.datetime(2000, 1, 1)
dates = np.array([base + datetime.timedelta(hours=i) for i in range(len(hours))])
shades = [Nightshade(date, alpha=0.2) for date in dates]
### CREATE FIGURE AND IMAGE
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.stock_img()
# plot lines that will show the ground track that will be animated
ax.plot(line[:, 0], line[:, 1], '--k')
# create a blank scatter to start
scatter = ax.scatter(None, None, color='r', s=30)
# initiate the circles around the scatter point
circle1 = plt.Circle((longitude.degrees[0], latitude.degrees[0]), radius=30, color='blue', alpha=0.3)
circle2 = plt.Circle((longitude.degrees[0], latitude.degrees[0]), radius=40, color='yellow', alpha=0.3)
# add the circles to the axis
ax.add_patch(circle1)
ax.add_patch(circle2)
# Add the nightshade feature (but set it to be invisible so it doesn't stay through the whole animation)
ns = ax.add_feature(Nightshade(base, alpha=0.0))
# Create all the updates for the animation
def update(i):
lon = ssp[i, 0]
lat = ssp[i, 1]
scatter.set_offsets(np.c_[lon, lat])
# add a feature for the next Nightshade feature
ns = ax.add_feature(shades[i], alpha=0.2)
circle1.center = (lon, lat)
circle2.center = (lon, lat)
return scatter, circle1, circle2, ns
# Run the animation
anim = animation.FuncAnimation(plt.gcf(), update, frames=df.shape[0],init_func=None, interval=250, blit=True)
plt.show()
# WRITE THE VIDEO
Writer = animation.writers['ffmpeg']
writer = Writer(fps=10, metadata=dict(artist='Me'), bitrate=1800)
anim.save('gt.mp4', writer=writer)
I have produced 17 global plots that show the decadal averages in maximum surface ozone from 1850-2015. Rather than plotting them individually, I wish to create an animation that cycles through them (almost like a gif), i.e. have the same coastlines, axes and colour bar throughout but change what is being plotted as the contour.
Any help on how to adapt my code to do this would be greatly appreciated - thank you in advance!!
import numpy as np
import netCDF4 as n4
import matplotlib.pyplot as plt
from matplotlib import colorbar, colors
import matplotlib.cm as cm
import cartopy as cart
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import cartopy.feature as cfeature
nc = n4.Dataset('datafile.nc','r')
# daily maximum O3 VMR (units: mol mol-1)
sfo3max = nc.variables['sfo3max']
lon = nc.variables['lon'] # longitude
lat = nc.variables['lat'] # latitude
# (I manipulate the data to produce 17 arrays containing the decadal average O3 VMR which are
# listed below in sfo3max_avg)
sfo3max_avg = [sfo3max_1850_1860_avg, sfo3max_1860_1870_avg, sfo3max_1870_1880_avg,
sfo3max_1880_1890_avg, sfo3max_1890_1900_avg, sfo3max_1900_1910_avg,
sfo3max_1910_1920_avg, sfo3max_1920_1930_avg, sfo3max_1930_1940_avg,
sfo3max_1940_1950_avg, sfo3max_1950_1960_avg, sfo3max_1960_1970_avg,
sfo3max_1970_1980_avg, sfo3max_1980_1990_avg, sfo3max_1990_2000_avg,
sfo3max_2000_2010_avg, sfo3max_2010_2015_avg]
# find overall min & max values for colour bar in plots
min_sfo3max_avg = np.array([])
for i in sfo3max_avg:
sfo3max_avg_min = np.amin(i)
min_sfo3max_avg = np.append(min_sfo3max_avg, sfo3max_avg_min)
overall_min_sfo3max_avg = np.amin(min_sfo3max_avg)
max_sfo3max_avg = np.array([])
for i in sfo3max_avg:
sfo3max_avg_max = np.amax(i)
max_sfo3max_avg = np.append(max_sfo3max_avg, sfo3max_avg_max)
overall_max_sfo3max_avg = np.amax(max_sfo3max_avg)
# finally plot the 17 global plots of sfo3max_avg
for k in sfo3max_avg:
fig = plt.figure()
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines() # Adding coastlines
cs = ax.contourf(lon[:], lat[:], k[:], cmap='magma')
ax.set_title('Decadal Average of Maximum O3 Volume Mixing Ratio')
m = plt.cm.ScalarMappable(cmap=cm.magma)
m.set_array(i[:])
m.set_clim(overall_min_sfo3max_avg, overall_max_sfo3max_avg)
# Additional necessary information
cbar = plt.colorbar(m, boundaries=np.arange(overall_min_sfo3max_avg, overall_max_sfo3max_avg
+ 0.5e-08, 0.5e-08))
cbar.set_label('mol mol-1')
# Adding axis labels - latitude & longitude
gridl = ax.gridlines(color="black", linestyle="dotted", draw_labels=True)
gridl.xformatter=LONGITUDE_FORMATTER
gridl.yformatter=LATITUDE_FORMATTER
gridl.xlabels_top = False
gridl.ylabels_right = False
fig.set_size_inches(w=20,h=10)
plt.show() # show global plot
Several elements in your plotting can be kept out of the loop because they only need to be set up once. After you set up the plot elements you can update the plot and animate by looping over the list. This can be achieved by making use of matplotlib's interactive mode as shown in the code below:
import numpy as np
import netCDF4 as n4
import matplotlib
matplotlib.use("nbagg")
import matplotlib.pyplot as plt
from matplotlib import colorbar, colors
import matplotlib.cm as cm
import cartopy as cart
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import cartopy.feature as cfeature
nc = n4.Dataset('datafile.nc','r')
# daily maximum O3 VMR (units: mol mol-1)
sfo3max = nc.variables['sfo3max']
lon = nc.variables['lon'] # longitude
lat = nc.variables['lat'] # latitude
# (I manipulate the data to produce 17 arrays containing the decadal average O3 VMR which are
# listed below in sfo3max_avg)
sfo3max_avg = [sfo3max_1850_1860_avg, sfo3max_1860_1870_avg, sfo3max_1870_1880_avg,
sfo3max_1880_1890_avg, sfo3max_1890_1900_avg, sfo3max_1900_1910_avg,
sfo3max_1910_1920_avg, sfo3max_1920_1930_avg, sfo3max_1930_1940_avg,
sfo3max_1940_1950_avg, sfo3max_1950_1960_avg, sfo3max_1960_1970_avg,
sfo3max_1970_1980_avg, sfo3max_1980_1990_avg, sfo3max_1990_2000_avg,
sfo3max_2000_2010_avg, sfo3max_2010_2015_avg]
# find overall min & max values for colour bar in plots
min_sfo3max_avg = np.array([])
for i in sfo3max_avg:
sfo3max_avg_min = np.amin(i)
min_sfo3max_avg = np.append(min_sfo3max_avg, sfo3max_avg_min)
overall_min_sfo3max_avg = np.amin(min_sfo3max_avg)
max_sfo3max_avg = np.array([])
for i in sfo3max_avg:
sfo3max_avg_max = np.amax(i)
max_sfo3max_avg = np.append(max_sfo3max_avg, sfo3max_avg_max)
overall_max_sfo3max_avg = np.amax(max_sfo3max_avg)
#setup the plot elements
fig = plt.figure()
fig.set_size_inches(w=20,h=10)
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines() # Adding coastlines
ax.set_title('Decadal Average of Maximum O3 Volume Mixing Ratio')
m = plt.cm.ScalarMappable(cmap=cm.magma)
m.set_array(i[:])
m.set_clim(overall_min_sfo3max_avg, overall_max_sfo3max_avg)
# Additional necessary information
cbar = plt.colorbar(m, boundaries=np.arange(overall_min_sfo3max_avg, overall_max_sfo3max_avg
+ 0.5e-08, 0.5e-08))
cbar.set_label('mol mol-1')
# plot here only the 1st item in your sfo3max_avg list.
cs = ax.contourf(lon[:], lat[:], sfo3max_avg[0][:], cmap='magma')
# Adding axis labels - latitude & longitude
gridl = ax.gridlines(color="black", linestyle="dotted", draw_labels=True)
gridl.xformatter=LONGITUDE_FORMATTER
gridl.yformatter=LATITUDE_FORMATTER
gridl.xlabels_top = False
gridl.ylabels_right = False
plt.ion() # set interactive mode
plt.show()
# finally plot the 17 global plots of sfo3max_avg
for k in sfo3max_avg:
cs = ax.contourf(lon[:], lat[:], k[:], cmap='magma')
plt.gcf().canvas.draw()
plt.pause(1) #control the interval between successive displays, currently set to 1 sec.
I would like to produce a heatmap in Python, similar to the one shown, where the size of the circle indicates the size of the sample in that cell. I looked in seaborn's gallery and couldn't find anything, and I don't think I can do this with matplotlib.
It's the inverse. While matplotlib can do pretty much everything, seaborn only provides a small subset of options.
So using matplotlib, you can plot a PatchCollection of circles as shown below.
Note: You could equally use a scatter plot, but since scatter dot sizes are in absolute units it would be rather hard to scale them into the grid.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
N = 10
M = 11
ylabels = ["".join(np.random.choice(list("PQRSTUVXYZ"), size=7)) for _ in range(N)]
xlabels = ["".join(np.random.choice(list("ABCDE"), size=3)) for _ in range(M)]
x, y = np.meshgrid(np.arange(M), np.arange(N))
s = np.random.randint(0, 180, size=(N,M))
c = np.random.rand(N, M)-0.5
fig, ax = plt.subplots()
R = s/s.max()/2
circles = [plt.Circle((j,i), radius=r) for r, j, i in zip(R.flat, x.flat, y.flat)]
col = PatchCollection(circles, array=c.flatten(), cmap="RdYlGn")
ax.add_collection(col)
ax.set(xticks=np.arange(M), yticks=np.arange(N),
xticklabels=xlabels, yticklabels=ylabels)
ax.set_xticks(np.arange(M+1)-0.5, minor=True)
ax.set_yticks(np.arange(N+1)-0.5, minor=True)
ax.grid(which='minor')
fig.colorbar(col)
plt.show()
Here's a possible solution using Bokeh Plots:
import pandas as pd
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np
output_notebook()
d = dict(x = ['A','A','A', 'B','B','B','C','C','C','D','D','D'],
y = ['B','C','D', 'A','C','D','B','D','A','A','B','C'],
corr = np.random.uniform(low=-1, high=1, size=(12,)).tolist())
df = pd.DataFrame(d)
df['size'] = np.where(df['corr']<0, np.abs(df['corr']), df['corr'])*50
#added a new column to make the plot size
colors = list(reversed(RdBu[9]))
exp_cmap = LinearColorMapper(palette=colors,
low = -1,
high = 1)
p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width=700,
plot_height=450, title="Correlation",
toolbar_location=None, tools="hover")
p.scatter("x","y",source=df, fill_alpha=1, line_width=0, size="size",
fill_color={"field":"corr", "transform":exp_cmap})
p.x_range.factors = sorted(df['x'].unique().tolist())
p.y_range.factors = sorted(df['y'].unique().tolist(), reverse = True)
p.xaxis.axis_label = 'Values'
p.yaxis.axis_label = 'Values'
bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p.add_layout(bar, "right")
show(p)
One option is to use matplotlib's scatter plots with legends and grid. You can specify size of those circles with specifying the scales. You can also change the color of each circle. You should somehow specify X,Y values so that the circles sit straight on lines. This is an example I got from here:
volume = np.random.rayleigh(27, size=40)
amount = np.random.poisson(10, size=40)
ranking = np.random.normal(size=40)
price = np.random.uniform(1, 10, size=40)
fig, ax = plt.subplots()
# Because the price is much too small when being provided as size for ``s``,
# we normalize it to some useful point sizes, s=0.3*(price*3)**2
scatter = ax.scatter(volume, amount, c=ranking, s=0.3*(price*3)**2,
vmin=-3, vmax=3, cmap="Spectral")
# Produce a legend for the ranking (colors). Even though there are 40 different
# rankings, we only want to show 5 of them in the legend.
legend1 = ax.legend(*scatter.legend_elements(num=5),
loc="upper left", title="Ranking")
ax.add_artist(legend1)
# Produce a legend for the price (sizes). Because we want to show the prices
# in dollars, we use the *func* argument to supply the inverse of the function
# used to calculate the sizes from above. The *fmt* ensures to show the price
# in dollars. Note how we target at 5 elements here, but obtain only 4 in the
# created legend due to the automatic round prices that are chosen for us.
kw = dict(prop="sizes", num=5, color=scatter.cmap(0.7), fmt="$ {x:.2f}",
func=lambda s: np.sqrt(s/.3)/3)
legend2 = ax.legend(*scatter.legend_elements(**kw),
loc="lower right", title="Price")
plt.show()
Output:
I don't have enough reputation to comment on Delenges' excellent answer, so I'll leave my comment as an answer instead:
R.flat doesn't order the way we need it to, so the circles assignment should be:
circles = [plt.Circle((j,i), radius=R[j][i]) for j, i in zip(x.flat, y.flat)]
Here is an easy example to plot circle_heatmap.
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.datasets import load_wine as load_data
from psynlig import plot_correlation_heatmap
plt.style.use('seaborn-talk')
data_set = load_data()
data = pd.DataFrame(data_set['data'], columns=data_set['feature_names'])
#data = df_corr_selected
kwargs = {
'heatmap': {
'vmin': -1,
'vmax': 1,
'cmap': 'viridis',
},
'figure': {
'figsize': (14, 10),
},
}
plot_correlation_heatmap(data, bubble=True, annotate=False, **kwargs)
plt.show()
I have been pulling my hair out for a while over this. I am trying to use mpldatacursor along with matplotlib to provide a tooltip functionality on scatter plots. Each point has some data associated with it which I would like to show when the point is clicked.
Here is a minimal (not) working example:
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
data.append((id+1)*np.random.random((2,1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
mpl.figure()
data = np.squeeze(np.asarray(data))
m, n = 0, 9
# Plot by group
for (id,nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] , label=nation)
m = n + 1
n += 10
formatter = lambda **kwargs: ', '.join(kwargs['point_label'])
mpl.legend()
mpldatacursor.datacursor(formatter=formatter, point_labels=idx)
mpl.show(block=True)
But when I do this, the tooltips don't match the legends. Further only labels starting with Russia and USA show up in the plot. What am I doing wrong?
Usually you would have your data in a table or, for the sake of the example, several lists. One would hence probably create a single scatter plot from the data columns and use a mapping of names to numbers to create the colors in the scatter.
Then one can use the matplotlib pick_event to get the data out of the respective list, given the index of the point on which the click happened.
This all does not require any external packages like datacursor.
import numpy as np; np.random.seed(42)
import string
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
#Create lists data, nat, idx
nat = np.random.choice(nations, 50)
data = np.random.rand(50,2)
strings = ["".join(np.random.choice(list(string.ascii_uppercase), 7)) for _ in range(50)]
idx = ["{}-{}".format(n,w) for n,w in zip(nat,strings)]
labels, i = np.unique(nat, return_inverse=True)
fig, ax = plt.subplots()
scatter = ax.scatter(data[:,0], data[:,1], c=i, cmap="RdYlGn", picker=5)
rect = lambda c: plt.Rectangle((0,0),1,1, color=scatter.cmap(scatter.norm(c)))
handles = [rect(c) for c in np.unique(i)]
plt.legend(handles, labels)
#Create annotation
annot = ax.annotate("", xy=(0,0), xytext=(-20,20),textcoords="offset points",
bbox=dict(boxstyle="round", fc="w"),
arrowprops=dict(arrowstyle="->"))
annot.set_visible(False)
#Create event handler
def onpick(evt):
if evt.artist == scatter:
ind = evt.ind[0]
annot.xy = (data[ind])
annot.set_text(idx[ind])
annot.set_visible(True)
if evt.mouseevent.button == 3:
annot.set_visible(False)
fig.canvas.draw_idle()
fig.canvas.mpl_connect("pick_event", onpick)
plt.show()
The issue was that each call to scatter by matplotlib was creating a new artist object. The workaround is based on the doc-string in the source code.
point_labels : sequence or dict, optional
Labels for "subitems" of an artist, passed to the formatter
function as the point_label kwarg. May be either a single
sequence (used for all artists) or a dict of artist:sequence pairs.
It does involve the import of a protected matplotlib module/member. This seems to work as I want:
import numpy as np
import mpldatacursor
import string
import matplotlib
from matplotlib import _pylab_helpers as pylab_helpers
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as mpl
nations = ['Russia', 'America', 'China', 'France']
data = list()
idx = list()
np.random.seed(42)
for (index, nation) in enumerate(nations):
for i in range(0,10):
data.append((index + 1) * np.random.random((2, 1)))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
idx.append(nation + '-' + ''.join(name))
data = np.squeeze(np.asarray(data))
m, n = 0, 9
artist_labels = list()
mpl.figure()
for (index, nation) in enumerate(nations):
mpl.scatter(data[m:n,0] , data[m:n,1] ,label=nation)
artist_labels.append(idx[m:n])
m = n + 1
n += 10
def plotted_artists(ax):
all_artists = (ax.lines + ax.patches + ax.collections
+ ax.images + ax.containers)
return all_artists
def formatter (**kwargs):
return kwargs['point_label'].pop()
managers = pylab_helpers.Gcf.get_all_fig_managers()
figs = [manager.canvas.figure for manager in managers]
axes = [ax for fig in figs for ax in fig.axes]
artists = [artist for ax in axes for artist in plotted_artists(ax)]
my_dict = dict(zip(artists, artist_labels))
mpldatacursor.datacursor(formatter=formatter, point_labels=my_dict)
mpl.legend()
mpl.show(block=True)
Assuming you simply want names, this seems to work correctly if you change the mpldatacursor.datacursor call to use '{label}' as in the first example on the mpldatacursor website,
mpldatacursor.datacursor(formatter='{label}'.format)
I think the problem is with kwargs and the lambda function. If you want further data in your tooltip, it may be best to add this to the label on plt.scatter, using a separate call for each point, e.g.
import numpy as np
import mpldatacursor
import string
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
nations = ['Russia', 'America', 'China', 'France']
cDict = {'Russia':'r', 'America':'b', 'China':'g', 'France':'c'}
np.random.seed(42) #Seed for repeatability
# Random data
for (id, nation) in enumerate(nations):
for i in range(0,10):
x = (id+1)*np.random.random((2,1))
name = list(string.ascii_uppercase[20:])
np.random.shuffle(name)
plt.scatter(x[0], x[1], c=cDict[nation], label=nation + '-' + ''.join(name))
mpldatacursor.datacursor(formatter='{label}'.format)
plt.show(block=True)