Fir kernel distribution to my data - python

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import matplotlib as mpl
import seaborn as sns
from scipy.stats import gaussian_kde
from numpy import linspace,hstack
LINE_WIDTH = 3
filename=('')
data=[ map(float, line.split()) for line in open(filename,'r') if line.strip()]
dataM=np.array(data)
meandata=np.mean(dataM,axis=0)
SD = np.std(dataM,axis=0)
sns.set_palette("hls")
mpl.rc("figure", figsize=(8, 4))
xs = np.linspace(meandata[0]-(4 * SD[0]) ,meandata[0]+( 4 * SD[0]), dataM[:,0].size)
ys=dataM[:,0]
n,bins,patches=plt.hist(ys,15)
I get this plot.
and I want to get a kernel gaussian distribution plotted over my histogram but I am getting an error TypeError: 'module' object is not callable
When I am trying to do this
my_pdf = gaussian_kde(ys)
x = linspace(30,100,1000)
plt(x,my_pdf(x),'r') # distribution function
plt.hist(ys,normed=1,alpha=.3) # histogram
plt.show()
What am I doing wrong?

You can do this directly using seaborn. It would be something like this:
import pandas as pd
import seaborn as sns
import scipy.stats
import matplotlib.pyplot as plt
data = pd.read_csv('input.txt')
sns.distplot(data, kde=False, fit=scipy.stats.norm)
plt.show()
For a kde plot just do:
sns.distplot(data);

Related

Create legends in scatter plt

I create a mask of my dataset for plotting only No Animals materials, and when I draw this mask I have problems with the legends, because only the first material defines me and I don't know how to add the other 2 materials.
import numpy as np
import umap
import umap.plot
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display,HTML
import cufflinks as cf
cf.set_config_file(sharing='public',theme='ggplot',offline=True)
import seaborn as sns
palpations = np.load('big_matrix_16384.npz',allow_pickle=True)
X = palpations['arr_0']
embedding = umap.UMAP(n_neighbors=50,
min_dist=0.2,
metric='correlation').fit(X)
emb = embedding.transform(X)
mask_1 = Data["Tipo"]=="Animal"
emb_tipo_1 = emb[mask_1]
cmap = plt.cm.Spectral
c =[sns.color_palette("Set2")[x] for x in data_tipo_1.Material.map({"bone":0, "cartilage":1, "liver_raw_piece1":2})]
plt.scatter(emb_tipo_1[:,0],
emb_tipo_1[:,1],
c=c,
label=np.unique(data_tipo_1.Material),s=10)
plt.gca().set_aspect("equal","datalim")
plt.title("UMAP muestras Animales.")
plt.legend()
enter image description here

How to plot heat map with interpolation in python?

Code Output Image
Desired Image
[
My CSV data consists of X axis value, Y axis value and Hardness value and I wanted to plot smooth heat map rather than in boxes like.
DATA:
import cv2
from skimage.io import imread, imshow
from skimage.transform import resize
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
import os
from tqdm import tqdm
import pandas as pd
import seaborn as sns
from sklearn.neighbors import KernelDensity
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import style
from astropy.convolution import convolve, Gaussian2DKernel
from scipy.ndimage.filters import gaussian_filter
path = r"C:\Users\patels\Desktop\Ti/"
ids = os.listdir(path)
#print(ids)
for n, id_ in tqdm(enumerate(ids), total=len(ids)):
data = pd.read_excel(path+id_)
print(path+id_)
df1 = data[['HV 0.2', 'X pos. [mm]', 'Y pos. [mm]']]
heatmap1_data = pd.pivot_table(df1, values='HV 0.2', index=['Y pos. [mm]'], columns='X pos. [mm]')
plt.figure() #this creates a new figure on which your plot will appear
heatmap1 = sns.heatmap(heatmap1_data, cmap="viridis", vmin=300, vmax=400)
plt.title(ids[n]+'Ti Hardness Map')

How plot many points 3d in matplotlib

I am trying to plot 8000 points in three dimensions (x,y,z) of a terrain with matplotlib using the function contourf when I run the code I get the error
'OverflowError: In draw_path_collection: Exceeded cell block limit'
I tried to solve this with "mpl.rcParams['agg.path.chunksize'] = 20000"
but this did not resolve the issue. Here is my code
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from scipy.interpolate import griddata
import pandas as pd
import matplotlib as mpl
datos = pd.read_csv('zrh_terrain.txt', header =0)
dats=500
mpl.rcParams['agg.path.chunksize'] = 20000
X=datos.iloc[0:dats,0].values
Y=datos.iloc[0:dats,1].values
Z=datos.iloc[0:dats,2].values
dt_bar=np.linspace(Z.min(),Z.max(),10)
xi,yi= np.meshgrid(X,Y)
zi = griddata((X,Y),Z,(xi,yi),method='nearest')
plt.contourf(xi,yi,zi,extend='both',vmin=dt_bar[0],vmax=dt_bar[-1],
cmap=cm.terrain)

Normalise with countplot()

The code below shows a graph with the numbers of values in my list:
import seaborn as sns
sns.countplot([0,1,2,3,1,2,1,3,2,1,2,1,3])
plt.show()
I would like the same plot with percentages instead. Is there an easy option with seaborn or matplotlib?
As shown here a countplot which shows normalized values can be easily achieved using a seaborn barplot.
import matplotlib.pyplot as plt
import seaborn as sns
x = [0,1,2,3,1,2,1,3,2,1,2,1,3]
percentage = lambda i: len(i) / float(len(x)) * 100
ax = sns.barplot(x=x, y=x, estimator=percentage)
ax.set(ylabel="Percent")
plt.show()
Or, using pandas,
import matplotlib.pyplot as plt
import pandas as pd
x = [0,1,2,3,1,2,1,3,2,1,2,1,3]
ax = (pd.Series(x).value_counts(normalize=True, sort=False)*100).plot.bar()
ax.set(ylabel="Percent")
plt.show()

Changing the size of the heatmap specifically in a seaborn clustermap?

I'm making a clustered heatmap in seaborn as follows
import numpy as np
import seaborn as sns
np.random.seed(2)
data = np.random.randn(100, 10)
sns.clustermap(data)
but the rows are squished:
but if I pass a size to the clustermap function then it looks terrible
is there a way to only increase the size of the heatmap part? So that the row names can be read, but not stretch out the cluster portions.
As #mwaskom commented, I was able to use ax_heatmap.set_position along with the get_position function to achieve the correct result.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(2)
data = np.random.randn(100, 10)
cm = sns.clustermap(data)
hm = cm.ax_heatmap.get_position()
plt.setp(cm.ax_heatmap.yaxis.get_majorticklabels(), fontsize=6)
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width*0.25, hm.height])
col = cm.ax_col_dendrogram.get_position()
cm.ax_col_dendrogram.set_position([col.x0, col.y0, col.width*0.25, col.height*0.5])
This can be done by passing the value of the dendrogram ratio in the kw arguments
import numpy as np
import seaborn as sns
np.random.seed(2)
data = np.random.randn(100, 10)
sns.clustermap(data,figsize=(12,30),dendrogram_ratio=0.02,cmap='RdBu')

Categories

Resources