I create a mask of my dataset for plotting only No Animals materials, and when I draw this mask I have problems with the legends, because only the first material defines me and I don't know how to add the other 2 materials.
import numpy as np
import umap
import umap.plot
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display,HTML
import cufflinks as cf
cf.set_config_file(sharing='public',theme='ggplot',offline=True)
import seaborn as sns
palpations = np.load('big_matrix_16384.npz',allow_pickle=True)
X = palpations['arr_0']
embedding = umap.UMAP(n_neighbors=50,
min_dist=0.2,
metric='correlation').fit(X)
emb = embedding.transform(X)
mask_1 = Data["Tipo"]=="Animal"
emb_tipo_1 = emb[mask_1]
cmap = plt.cm.Spectral
c =[sns.color_palette("Set2")[x] for x in data_tipo_1.Material.map({"bone":0, "cartilage":1, "liver_raw_piece1":2})]
plt.scatter(emb_tipo_1[:,0],
emb_tipo_1[:,1],
c=c,
label=np.unique(data_tipo_1.Material),s=10)
plt.gca().set_aspect("equal","datalim")
plt.title("UMAP muestras Animales.")
plt.legend()
enter image description here
Related
I would like to plot a line plot and make different overlay based on condition as illustrated below.
May I know how, or if possible, please kindly redirect me to right material on achieving the intended objective.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
rng = np.random.default_rng(2)
mlist=[]
for _ in range(4):
m=np.random.rand(4).tolist()
n=rng.integers(0, 6, size=(1)).tolist()*4
df = pd.DataFrame(zip(m,n), columns=['yval','type'])
mlist.append(df)
df=pd.concat(mlist).reset_index(drop=True).reset_index()
sns.lineplot(data=df, x="index", y="yval")
plt.show()
Suggestion using Matplotlib or Seaborn, or any other package are welcome
The filling of the section was achieved using axvspan. I also used text for annotations.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
rng = np.random.default_rng(2)
mlist=[]
for _ in range(4):
m=np.random.rand(4).tolist()
n=rng.integers(0, 6, size=(1)).tolist()*4
df = pd.DataFrame(zip(m,n), columns=['yval','type'])
mlist.append(df)
df=pd.concat(mlist).reset_index(drop=True).reset_index()
g = sns.lineplot(data=df, x="index", y="yval")
overlay = {0:'m',1:'gray',5:'r'}
for i in np.arange(0,len(df),4):
tmp = df.iloc[i:i+4, :]
v = overlay.get(tmp.type.unique()[0])
g.axvspan(min(tmp.index), max(tmp.index)+1, color=v, alpha=0.3)
g.text(((min(tmp.index)+max(tmp.index)+1) / 2)-1, 0.1,'type {}'.format(tmp.type.unique()[0]), fontsize=12)
plt.show()
Using Matplotlib add_patch and text
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as mcolors
# nn=mcolors.CSS4_COLORS
all_colors=list(mcolors.TABLEAU_COLORS.keys())
b=1
np.random.seed(0)
rng = np.random.default_rng(2)
mlist=[]
for _ in range(4):
m=np.random.rand(4).tolist()
n=rng.integers(0, 6, size=(1)).tolist()*4
df = pd.DataFrame(zip(m,n), columns=['yval','type'])
mlist.append(df)
df=pd.concat(mlist).reset_index(drop=True).reset_index()
# df.to_feather('test.feather')
# df=pd.read_feather('test.feather')
df['C'] = df['type'].diff()
df['C']=df['C'].fillna(10)
nb=df.type[(df['C'] != 0)].to_frame().reset_index()
unique_val=nb['type'].drop_duplicates().sort_values().tolist()
ngroup_type=dict(zip(unique_val,[f'type {idx}' for idx in unique_val]))
nb['ngroup']=nb["type"].map(ngroup_type)
color_group=all_colors[:len(unique_val)]
res = dict(zip(unique_val, color_group))
nb["color"] = nb["type"].map(res)
starting_point=nb["index"].values.tolist()
mcolor=nb["color"].values.tolist()
group_type=nb["ngroup"].values.tolist()
nspace=4
nheight=1
fg=sns.lineplot(data=df, x="index", y="yval")
for ncolor,spoint,gtype in zip(mcolor,starting_point,group_type):
fg.axes.add_patch(patches.Rectangle((spoint, 0),
nspace,nheight,edgecolor = 'blue',
facecolor = ncolor,fill=True,alpha=0.1,ls=':') )
fg.axes.text(spoint+1.5, 0.1, gtype , size=10,
va="baseline", ha="left", multialignment="left")
plt.show()
Code Output Image
Desired Image
[
My CSV data consists of X axis value, Y axis value and Hardness value and I wanted to plot smooth heat map rather than in boxes like.
DATA:
import cv2
from skimage.io import imread, imshow
from skimage.transform import resize
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
import os
from tqdm import tqdm
import pandas as pd
import seaborn as sns
from sklearn.neighbors import KernelDensity
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import style
from astropy.convolution import convolve, Gaussian2DKernel
from scipy.ndimage.filters import gaussian_filter
path = r"C:\Users\patels\Desktop\Ti/"
ids = os.listdir(path)
#print(ids)
for n, id_ in tqdm(enumerate(ids), total=len(ids)):
data = pd.read_excel(path+id_)
print(path+id_)
df1 = data[['HV 0.2', 'X pos. [mm]', 'Y pos. [mm]']]
heatmap1_data = pd.pivot_table(df1, values='HV 0.2', index=['Y pos. [mm]'], columns='X pos. [mm]')
plt.figure() #this creates a new figure on which your plot will appear
heatmap1 = sns.heatmap(heatmap1_data, cmap="viridis", vmin=300, vmax=400)
plt.title(ids[n]+'Ti Hardness Map')
I have written a simple K-mean algorithm, But I am finding difficulty to explore it cluster by cluster.
Github Link: https://github.com/AkshayBayas/Machine-learning-/blob/master/K-Means%20algorithm.ipynb
Code:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%pylab
Df = pd.read_csv('Kdata.csv')
from sklearn.cluster import KMeans
KModule = KMeans()
K_model = KModule.fit(Df)
K_result = K_model.predict(Df)
centers = K_model.cluster_centers_
K_model.labels_
plt.scatter (x1,x2, c = K_model.labels_, cmap = 'rainbow' )
Can anyone help?
No idea what you mean by "explore cluster by cluster".
If you don't specify the number of clusters, by default it is 8, so if you start with 3 like the code below, you can separate them. Also you need to set it as categoric, the cluster, so it will not be colored on a continuous scale:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
Df = pd.read_csv('Kdata.csv')
from sklearn.cluster import KMeans
KModule = KMeans(n_clusters=3)
K_model = KModule.fit(Df)
K_result = K_model.predict(Df)
Df['cluster'] = pd.Categorical(K_model.labels_)
sns.scatterplot("V1","V2",data=Df,hue='cluster',cmap = 'rainbow' )
Df.plot.scatter("V1","V2",c='cluster',cmap = 'rainbow')
I'm making a clustered heatmap in seaborn as follows
import numpy as np
import seaborn as sns
np.random.seed(2)
data = np.random.randn(100, 10)
sns.clustermap(data)
but the rows are squished:
but if I pass a size to the clustermap function then it looks terrible
is there a way to only increase the size of the heatmap part? So that the row names can be read, but not stretch out the cluster portions.
As #mwaskom commented, I was able to use ax_heatmap.set_position along with the get_position function to achieve the correct result.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(2)
data = np.random.randn(100, 10)
cm = sns.clustermap(data)
hm = cm.ax_heatmap.get_position()
plt.setp(cm.ax_heatmap.yaxis.get_majorticklabels(), fontsize=6)
cm.ax_heatmap.set_position([hm.x0, hm.y0, hm.width*0.25, hm.height])
col = cm.ax_col_dendrogram.get_position()
cm.ax_col_dendrogram.set_position([col.x0, col.y0, col.width*0.25, col.height*0.5])
This can be done by passing the value of the dendrogram ratio in the kw arguments
import numpy as np
import seaborn as sns
np.random.seed(2)
data = np.random.randn(100, 10)
sns.clustermap(data,figsize=(12,30),dendrogram_ratio=0.02,cmap='RdBu')
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import matplotlib as mpl
import seaborn as sns
from scipy.stats import gaussian_kde
from numpy import linspace,hstack
LINE_WIDTH = 3
filename=('')
data=[ map(float, line.split()) for line in open(filename,'r') if line.strip()]
dataM=np.array(data)
meandata=np.mean(dataM,axis=0)
SD = np.std(dataM,axis=0)
sns.set_palette("hls")
mpl.rc("figure", figsize=(8, 4))
xs = np.linspace(meandata[0]-(4 * SD[0]) ,meandata[0]+( 4 * SD[0]), dataM[:,0].size)
ys=dataM[:,0]
n,bins,patches=plt.hist(ys,15)
I get this plot.
and I want to get a kernel gaussian distribution plotted over my histogram but I am getting an error TypeError: 'module' object is not callable
When I am trying to do this
my_pdf = gaussian_kde(ys)
x = linspace(30,100,1000)
plt(x,my_pdf(x),'r') # distribution function
plt.hist(ys,normed=1,alpha=.3) # histogram
plt.show()
What am I doing wrong?
You can do this directly using seaborn. It would be something like this:
import pandas as pd
import seaborn as sns
import scipy.stats
import matplotlib.pyplot as plt
data = pd.read_csv('input.txt')
sns.distplot(data, kde=False, fit=scipy.stats.norm)
plt.show()
For a kde plot just do:
sns.distplot(data);