How to make jitterplot on matplolib python - python

Here is my code (adapted from here):
df_1 = pd.DataFrame({'Cells' : np.arange(0,100), 'Delta_7' : np.random.rand(100,), 'Delta_10' : np.random.rand(100,), 'Delta_14' : np.random.rand(100,)}, columns = ['Cells','Delta_7', 'Delta_10', 'Delta_14'])
#figure
fig, ax1 = plt.subplots()
fig.set_size_inches(13, 10)
#c sequence
c = df_1['Delta_7']
#plot
plt.scatter(np.full((len(df_1), 1), 1), df_1['Delta_7'] , s = 50, c=c, cmap = 'viridis')
plt.scatter(np.full((len(df_1), 1), 2), df_1['Delta_10'] , s = 50, c=c, cmap = 'viridis')
plt.scatter(np.full((len(df_1), 1), 3), df_1['Delta_14'] , s = 50, c=c, cmap = 'viridis')
cbar = plt.colorbar()
I would like to make a beautiful jitterplot (like on R or seaborn) with matplotlib. The thing is that I would like to give each cell a color based on its 'Delta_7' value. And this color would be kept when plotting 'Delta_10' and 'Delta_14', that I didn't manage to do with seaborn.
Please, could you let me know if you have any clue (python package, coding tricks …)?
Kindly,

The positions of the dots can be obtained from the list returned by scatter. These positions can be jittered, for example only in the x-direction. Possibly the range of the x-axis needs to be extended a bit to show every displaced dot.
Here is some code to start experimenting:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def jitter_dots(dots):
offsets = dots.get_offsets()
jittered_offsets = offsets
# only jitter in the x-direction
jittered_offsets[:, 0] += np.random.uniform(-0.3, 0.3, offsets.shape[0])
dots.set_offsets(jittered_offsets)
df_1 = pd.DataFrame({'Cells': np.arange(0, 100),
'Delta_7': np.random.rand(100),
'Delta_10': np.random.rand(100),
'Delta_14': np.random.rand(100)})
fig, ax1 = plt.subplots()
columns = df_1.columns[1:]
c = df_1['Delta_7']
for i, column in enumerate(columns):
dots = plt.scatter(np.full((len(df_1), 1), i), df_1[column], s=50, c=c, cmap='plasma')
jitter_dots(dots)
plt.xticks(range(len(columns)), columns)
xmin, xmax = plt.xlim()
plt.xlim(xmin - 0.3, xmax + 0.3) # make some room to show the jittered dots
cbar = plt.colorbar()
plt.show()

Related

Python matplotlib polar coordinate is not plotting as it is supposed to be

I am plotting from a CSV file that contains Cartesian coordinates and I want to change it to Polar coordinates, then plot using the Polar coordinates.
Here is the code
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.read_csv('test_for_plotting.csv',index_col = 0)
x_temp = df['x'].values
y_temp = df['y'].values
df['radius'] = np.sqrt( np.power(x_temp,2) + np.power(y_temp,2) )
df['theta'] = np.arctan2(y_temp,x_temp)
df['degrees'] = np.degrees(df['theta'].values)
df['radians'] = np.radians(df['degrees'].values)
ax = plt.axes(polar = True)
ax.set_aspect('equal')
ax.axis("off")
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white','figure.figsize':(10,10)})
# sns.scatterplot(data = df, x = 'x',y = 'y', s= 1,alpha = 0.1, color = 'black',ax = ax)
sns.scatterplot(data = df, x = 'radians',y = 'radius', s= 1,alpha = 0.1, color = 'black',ax = ax)
plt.tight_layout()
plt.show()
Here is the dataset
If you run this command using polar = False and use this line to plot sns.scatterplot(data = df, x = 'x',y = 'y', s= 1,alpha = 0.1, color = 'black',ax = ax) it will result in this picture
now after setting polar = True and run this line to plot sns.scatterplot(data = df, x = 'radians',y = 'radius', s= 1,alpha = 0.1, color = 'black',ax = ax) It is supposed to give you this
But it is not working as if you run the actual code the shape in the Polar format is the same as Cartesian which does not make sense and it does not match the picture I showed you for polar (If you are wondering where did I get the second picture from, I plotted it using R)
I would appreciate your help and insights and thanks in advance!
For a polar plot, the "x-axis" represents the angle in radians. So, you need to switch x and y, and convert the angles to radians (I also added ax=ax, as the axes was created explicitly):
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
data = {'radius': [0, 0.5, 1, 1.5, 2, 2.5], 'degrees': [0, 25, 75, 155, 245, 335]}
df_temp = pd.DataFrame(data)
ax = plt.axes(polar=True)
sns.scatterplot(x=np.radians(df_temp['degrees']), y=df_temp['radius'].to_numpy(),
s=100, alpha=1, color='black', ax=ax)
for deg, y in zip(df_temp['degrees'], df_temp['radius']):
x = np.radians(deg)
ax.axvline(x, color='skyblue', ls=':')
ax.text(x, y, f' {deg}', color='crimson')
ax.set_rlabel_position(-15) # Move radial labels away from plotted dots
plt.tight_layout()
plt.show()
About your new question: if you have an xy plot, and you convert these xy values to polar coordinates, and then plot these on a polar plot, you'll get again the same plot.
After some more testing with the data, I decided to create the plot directly with matplotlib, as seaborn makes some changes that don't have exactly equal effects across seaborn and matplotlib versions.
What seems to be happening in R:
The angles (given by "x") are spread out to fill the range (0,2 pi). This either requires a rescaling of x, or change how the x-values are mapped to angles. One way to get this, is subtracting the minimum. And with that result divide by the new maximum and multiply by 2 pi.
The 0 of the angles it at the top, and the angles go clockwise.
The following code should create the plot with Python. You might want to experiment with alpha and with s in the scatter plot options. (Default the scatter dots get an outline, which often isn't desired when working with very small dots, and can be removed by lw=0.)
ax = plt.axes(polar=True)
ax.set_aspect('equal')
ax.axis('off')
x_temp = df['x'].to_numpy()
y_temp = df['y'].to_numpy()
x_temp -= x_temp.min()
x_temp = x_temp / x_temp.max() * 2 * np.pi
ax.scatter(x=x_temp, y=y_temp, s=0.05, alpha=1, color='black', lw=0)
ax.set_rlim(y_temp.min(), y_temp.max())
ax.set_theta_zero_location("N") # set zero at the north (top)
ax.set_theta_direction(-1) # go clockwise
plt.show()
At the left the resulting image, at the right using the y-values for coloring (ax.scatter(..., c=y_temp, s=0.05, alpha=1, cmap='plasma_r', lw=0)):

Not getting the heatmap in the background using Matplotlib Python

I have tried this and got the result as in the image:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax = ax1.twiny()
# Scatter plot of positive points, coloured blue (C0)
ax.scatter(np.argwhere(df['real'] > 0), df.loc[df['real'] > 0, 'real'], color='C2')
# Scatter plot of negative points, coloured red (C3)
ax.scatter(np.argwhere(df['real'] < 0), df.loc[df['real'] < 0, 'real'], color='C3')
# Scatter neutral values in grey (C7)
ax.scatter(np.argwhere(df['real'] == 0), df.loc[df['real'] == 0, 'real'], color='C7')
ax.set_ylim([df['real'].min(), df['real'].max()])
index = len(df.index)
ymin = df['prediction'].min()
ymax= df['prediction'].max()
ax1.imshow([np.arange(index),df['prediction']],cmap=cmap,
extent=(0,index-1,ymin, ymax), alpha=0.8)
plt.show()
Image:
I was expecting one output where the color is placed according to the figure. I am getting green color and no reds or greys.
I want to get the image or contours spread as the values are. How I can do that? See the following image, something similar:
Please let me know how I can achieve this. The data I used is here: t.csv
For a live version, have a look at Tensorflow Playground
There are essentially 2 tasks required in a solution like this:
Plot the heatmap as the background;
Plot the scatter data;
Output:
Source code:
import numpy as np
import matplotlib.pyplot as plt
###
# Plot heatmap in the background
###
# Setting up input values
x = np.arange(-6.0, 6.0, 0.1)
y = np.arange(-6.0, 6.0, 0.1)
X, Y = np.meshgrid(x, y)
# plot heatmap colorspace in the background
fig, ax = plt.subplots(nrows=1)
im = ax.imshow(X, cmap=plt.cm.get_cmap('RdBu'), extent=(-6, 6, -6, 6), interpolation='bilinear')
cax = fig.add_axes([0.21, 0.95, 0.6, 0.03]) # [left, bottom, width, height]
fig.colorbar(im, cax=cax, orientation='horizontal') # add colorbar at the top
###
# Plot data as scatter
###
# generate the points
num_samples = 150
theta = np.linspace(0, 2 * np.pi, num_samples)
# generate inner points
circle_r = 2
r = circle_r * np.random.rand(num_samples)
inner_x, inner_y = r * np.cos(theta), r * np.sin(theta)
# generate outter points
circle_r = 4
r = circle_r + np.random.rand(num_samples)
outter_x, outter_y = r * np.cos(theta), r * np.sin(theta)
# plot data
ax.scatter(inner_x, inner_y, s=30, marker='o', color='royalblue', edgecolors='white', linewidths=0.8)
ax.scatter(outter_x, outter_y, s=30, marker='o', color='crimson', edgecolors='white', linewidths=0.8)
ax.set_ylim([-6,6])
ax.set_xlim([-6,6])
plt.show()
To keep things simple, I kept the colorbar range (-6, 6) to match the data range.
I'm sure this code can be changed to suit your specific needs. Good luck!
Here is a possible solution.
A few notes and questions:
What are the 'prediction' values in your data file? They do not seem to correlate with the values in the 'real' column.
Why do you create a second axis? What is represented on the bottom X-axis in your plot? I removed the second axis and labelled the remaining axes (index and real).
When you slice a pandas DataFrame, the index comes with it. You don't need to create a separate index (argwhere and arange(index) in your code). I simplified the first part of the code, where scatterplots are produced.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list("", ["red","grey","green"])
df = pd.read_csv('t.csv', header=0)
print(df)
fig = plt.figure()
ax = fig.add_subplot(111)
# Data limits
xmin = 0
xmax = df.shape[0]
ymin = df['real'].min()
ymax = df['real'].max()
# Scatter plots
gt0 = df.loc[df['real'] > 0, 'real']
lt0 = df.loc[df['real'] < 0, 'real']
eq0 = df.loc[df['real'] == 0, 'real']
ax.scatter(gt0.index, gt0.values, edgecolor='white', color='C2')
ax.scatter(lt0.index, lt0.values, edgecolor='white', color='C3')
ax.scatter(eq0.index, eq0.values, edgecolor='white', color='C7')
ax.set_ylim((ymin, ymax))
ax.set_xlabel('index')
ax.set_ylabel('real')
# We want 0 to be in the middle of the colourbar,
# because gray is defined as df['real'] == 0
if abs(ymax) > abs(ymin):
lim = abs(ymax)
else:
lim = abs(ymin)
# Create a gradient that runs from -lim to lim in N number of steps,
# where N is the number of colour steps in the cmap.
grad = np.arange(-lim, lim, 2*lim/cmap.N)
# Arrays plotted with imshow must be 2D arrays. In this case it will be
# 1 pixel wide and N pixels tall. Set the aspect ratio to auto so that
# each pixel is stretched out to the full width of the frame.
grad = np.expand_dims(grad, axis=1)
im = ax.imshow(grad, cmap=cmap, aspect='auto', alpha=1, origin='bottom',
extent=(xmin, xmax, -lim, lim))
fig.colorbar(im, label='real')
plt.show()
This gives the following result:

How to add cross (X) on a heatmap cells like with R language?

I would like to add cross (X) on heatmap cells (depending on significance level, but the question is on adding the X).
Like in R-language (sig.level = XXX).
See the Python and R code used and the corresponding output images.
Thank you for your help.
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, center=0, vmin=-1, vmax=1, square=True, linewidths=0.5, fmt=".2f",
cbar_kws={"shrink": .65, "orientation": "horizontal", "ticks":np.arange(-1, 1+1, 0.2)},
annot = True, annot_kws={"weight": 'bold', "size":15})
corrplot(cor(subset (wqw, select =
c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# compute the p matrix
p.mat = cor.mtest(subset
(wqw, select = c(fixed.acidity:quality,ratio.sulfur.dioxide))),
# significance level 0.01
sig.level = 0.01,
# Method to display : color (could be corcle, ...)
method = "color",
# color palette
col = colorRampPalette(c("#BB4444", "#EE9988",
"#FFFFFF", "#77AADD", "#4477AA"))(200),
)
```
The easy solution is to add a scatter plot with an X-shaped marker to cross out the unwanted cells.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
ax.scatter(*np.argwhere(data_masked.T < 0.4).T, marker="x", color="black", s=100)
plt.show()
The drawback of this is that the markersize (s) is independent of the number of cells and needs to be adjusted for different figure sizes.
An alternative is hence to draw some lines (an X are two crossed lines) at the respective positions. Here we create a function crossout(points, ax=None, scale=1, **kwargs), where scale is the percentage the lines shall take from each cell.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
def crossout(points, ax=None, scale=1, **kwargs):
ax = ax or plt.gca()
l = np.array([[[1,1],[-1,-1]]])*scale/2.
r = np.array([[[-1,1],[1,-1]]])*scale/2.
p = np.atleast_3d(points).transpose(0,2,1)
c = LineCollection(np.concatenate((l+p,r+p), axis=0), **kwargs)
ax.add_collection(c)
return c
data = np.random.rand(10,10)
mask = np.zeros_like(data)
mask[np.triu_indices_from(mask)] = True
data_masked = np.ma.array(data, mask=mask)
fig, ax = plt.subplots()
im = ax.imshow(data_masked, cmap="YlGnBu", origin="upper")
fig.colorbar(im)
crossout(np.argwhere(data_masked.T < 0.4), ax=ax, scale=0.8, color="black")
plt.show()
For scale=0.8 this looks like
Note that for a pcolormesh plot or a seaborn heatmap (which uses pcolormesh internally), one would need to add 0.5 to the data, i.e.
np.argwhere(data_masked.T < 0.4)+0.5

Matplotlib colormap, scatter plot passing a third variable for color: invalid RGBA argument

we are building our reports on matplotlib. Each page has multiple charts and some text.
In the report data there is over 100 locations, each location has a density. The idea is to plot the points on a map where the color (shade of red) represents the density of the location.
However, I do not understand the connection between the kwargs : c and cmap in the ax.scatter call, nor do I understand the role of color.Normalize in this application.
import pandas as pd
import matplotlib
import numpy as np
from pandas import Series, DataFrame
import csv
from scipy import stats
import matplotlib.pyplot as plt
import random
import matplotlib.colors as colors
# Get the data and transform
data = pd.read_csv('logHistThis.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
dataMean = data['Density'].mean()
data = list(data['Density'])
# I was under the impresion that the data for the colormap
# had to be between 1 and 0 so did this:
aColorScale = []
def myColorScale(theData):
aColorScale = []
for x in theData:
this = x/100
aColorScale.append(this)
return aColorScale
aColorScale = myColorScale(data)
estimated_mu, estimated_sigma = stats.norm.fit(data)
xmin = min(data)
xmax = max(data)
x = np.linspace(xmin, xmax, 100)
pdf = stats.norm.pdf(x, loc=estimated_mu, scale=estimated_sigma)
thisRangeMin = np.log(27)
thisRangeMax = np.log(35)
q = [np.random.choice(data, 40)]
z = [ np.random.randint(1, 50, size=40)]
s = 100 *q
colormap = 'Reds'
normalize =matplotlib.colors.Normalize(vmin=xmin, vmax=xmax)
#plt.scatter(x,y,z,s=5, cmap=colormap, norm=normalize, marker='*')
fig = plt.figure(figsize=(10, 5), frameon=False, edgecolor='000000', linewidth = 1)
rect0 = .05, .05, .4, .9
rect1 = .5, .05, .4, .9
# This works great
ax1 = fig.add_axes(rect0)#<-----------x2TopTenSummary
ax1.hist(data, bins=13, normed=True, color='c', alpha=0.05)
#ax1.fill_between(x, pdf, where=(), alpha=.2)
ax1.fill_between(x, pdf, where=((x < thisRangeMax) & ( x > thisRangeMin)), alpha=.2, label='City Range')
ax1.vlines(dataMean, 0, stats.norm.pdf(dataMean, loc=estimated_mu, scale=estimated_sigma), color='r')
ax1.plot(x, pdf, 'k')
# This does not work :
# It just gives blue dots
ax2= fig.add_axes(rect1)
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, s=200, cmap= 'Reds',norm=matplotlib.colors.Normalize(vmin=min(aColorScale) , vmax=max(aColorScale)))
# Tried to set the color map in a variety of ways:
# When kwarg 'c' is set to the variable 'aColorScale' i get the error
plt.show()
plt.close()
So my question is how do we incorporate the colormap in an application of this sort?
Multiple axes on a figure with a predetermined size (A4 or letter).
The color determination is a third variable z, (not x or y)
The color determinant is a float where 0 < z < 8
the call is ax not plt
The description of the application in the docs is unclear to me:
the doc for axes.scatter
the doc for color.normalize
I have seen plenty of examples where there is only one ax in the figure and the call is to plt.scatter... for example here
In our case x, y will be longitude, lattitude and the variable is 'data' a list or array of floats between 0 and 8.
Thanks
Okay the answer came from the PyCon Israel 2017 in this document by Tamir Lousky.
The normalization of the data and the correlation with color map happens with this block of code right here:
aColorScale = data
aColorScale = np.array(aColorScale)
norm = (aColorScale - aColorScale.min())/(aColorScale.max() - aColorScale.min())
cmap= plt.get_cmap('Reds')
colors = [cmap(tl) for tl in norm]#<---- thisRightHere
Then colors gets fed into ax2:
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, s=200, color = colors)
I wish those who downvoted my question would say why, there was hours of searching and trying to find this.
Anyway here is the final image:
While I do have problems understanding the issue itself, I can tell you that the solution you have in your answer can be simplified to the usual way to plot scatters:
ax2= fig.add_axes(rect1)
ax2.scatter(q,z, c=aColorScale, s=200, cmap='Reds')

Setting the size of a matplotlib ColorbarBase object

I have an patch collection that I'd like to display a color map for. Because of some manipulations I do on top of the colormap, it's not possible for me to define it using a matplotlib.colorbar instance. At least not as far as I can tell; doing so strips some manipulations I do with my colors that blank out patches lacking data:
cmap = matplotlib.cm.YlOrRd
colors = [cmap(n) if pd.notnull(n) else [1,1,1,1]
for n in plt.Normalize(0, 1)([nullity for _, nullity in squares])]
# Now we draw.
for i, ((min_x, max_x, min_y, max_y), _) in enumerate(squares):
square = shapely.geometry.Polygon([[min_x, min_y], [max_x, min_y],
[max_x, max_y], [min_x, max_y]])
ax0.add_patch(descartes.PolygonPatch(square, fc=colors[i],
ec='white', alpha=1, zorder=4))
So I define a matplotlib.colorbar.ColorbarBase instance instead, which works:
matplotlib.colorbar.ColorbarBase(ax1, cmap=cmap, orientation='vertical',
norm=matplotlib.colors.Normalize(vmin=0, vmax=1))
Which results in e.g.:
The problem I have is that I want to reduce the size of this colorbar (specifically, the shrink it down to a specific vertical size, say, 500 pixels), but I don't see any obvious way of doing this. If I had a colorbar instance, I could adjust this easily using its axis property arguments, but ColorbarBase lacks these.
For further reference:
The example my implementation is based on.
The source code in question (warning: lengthy).
The size and shape is defined with the axis. This is a snippet from code I have where I group 2 plots together and add a colorbar at the top independently. I played with the values in that add_axes instance until I got a size that worked for me:
cax = fig.add_axes([0.125, 0.925, 0.775, 0.0725]) #has to be as a list - starts with x, y coordinates for start and then width and height in % of figure width
norm = mpl.colors.Normalize(vmin = low_val, vmax = high_val)
mpl.colorbar.ColorbarBase(cax, cmap = self.cmap, norm = norm, orientation = 'horizontal')
The question may be a bit old, but I found another solution that can be of help for anyone who is not willing to manually create a colorbar axes for the ColorbarBase class.
The solution below uses the matplotlib.colorbar.make_axes class to create a dependent sub_axes from the given axes. That sub_axes can then be supplied for the ColorbarBase class for the colorbar creation.
The code is derived from the matplotlib code example describe in here
Here is a snippet code:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.colorbar as mcbar
from matplotlib import ticker
import matplotlib.colors as mcolors
# Make some illustrative fake data:
x = np.arange(0, np.pi, 0.1)
y = np.arange(0, 2 * np.pi, 0.1)
X, Y = np.meshgrid(x, y)
Z = np.cos(X) * np.sin(Y) * 10
colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)] # R -> G -> B
n_bins = [3, 6, 10, 100] # Discretizes the interpolation into bins
cmap_name = 'my_list'
fig, axs = plt.subplots(2, 2, figsize=(9, 7))
fig.subplots_adjust(left=0.02, bottom=0.06, right=0.95, top=0.94, wspace=0.05)
for n_bin, ax in zip(n_bins, axs.ravel()):
# Create the colormap
cm = LinearSegmentedColormap.from_list(cmap_name, colors, N=n_bin)
# Fewer bins will result in "coarser" colomap interpolation
im = ax.imshow(Z, interpolation='nearest', origin='lower', cmap=cm)
ax.set_title("N bins: %s" % n_bin)
cax, cbar_kwds = mcbar.make_axes(ax, location = 'right',
fraction=0.15, shrink=0.5, aspect=20)
cbar = mcbar.ColorbarBase(cax, cmap=cm,
norm=mcolors.Normalize(clip=False),
alpha=None,
values=None,
boundaries=None,
orientation='vertical', ticklocation='auto', extend='both',
ticks=n_bins,
format=ticker.FormatStrFormatter('%.2f'),
drawedges=False,
filled=True,
extendfrac=None,
extendrect=False, label='my label')
if n_bin <= 10:
cbar.locator = ticker.MaxNLocator(n_bin)
cbar.update_ticks()
else:
cbar.locator = ticker.MaxNLocator(5)
cbar.update_ticks()
fig.show()

Categories

Resources