Use SpatialDE to evaluate my anndata.h5ad but no result - python

enter image description here
enter image description here
import SpatialDE
import numpy as np
import anndata
import scanpy as sc
import cv2
from sklearn.metrics import silhouette_score
from sklearn import metrics
import scanpy as sc
import pandas as pd
from decimal import Decimal
from decimal import Decimal,ROUND_HALF_UP
mousebrain_after_normalization_path='/home/user/imputation_test_data/MouseBrain_D1_normalization.h5ad'
adata_before = anndata.read_h5ad(mousebrain_after_normalization_path)
counts_before = pd.DataFrame(adata_before.X, columns=adata_before.var_names, index=adata_before.obs_names)
coord_before = pd.DataFrame(adata_before.obsm['spatial'], columns=['x', 'y'], index=adata_before.obs_names)
result_before = SpatialDE.run(coord_before, counts_before)
adata_before.obsm['spatialDE_result'] = result_before
I tried to use SpatialDE to evaluate my anndata on the computer server, I have transferred anndata.X to counts_before(which is a dataframe) and adata.obsm['spatial'] to be coord_before. and using SpatialDE.run to finish them. But no result.

Related

Python: how can I read data from files and assign it to an array? error":could not broadcast input array from shape"

I have the following MATLAB code that works fine using text data files, now I am trying to rewrite it using Python but running into errors. I have results that I am trying to apply some calculations on (perform data analysis). My results are in the format of binary files and I have a specific package I am using to help me import the data. For example, here ne is a 1024x256 array with 159 number of files printed per each iteration. So, in MATLAB I can simply do the following:
% Load data:
frame = 6; % how many number of output files
ne_bg = load([DirPath '/ne_unpert.txt']);
ne_p = load([DirPath '/ne_' num2str(frame) '.txt']);
% perform calculations on data:
ne = ne_bg + ne_p;
dn_over_n = ne_p ./ ne;
Since MATLAB deals easily with multi-dimensional arrays and matrices, I am struggling to interpret that to python.
My Python code:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.gridspec as gridspec
import matplotlib.colors as colors
import matplotlib.patches as patches
import scipy.optimize as opt
from scipy.special import erf, comb, gamma, gammainc
import scipy.constants as const
from scipy.integrate import odeint
import sys
from glob import glob
from mpl_toolkits.axes_grid1 import make_axes_locatable
import Package as pg
# Initialize sizes
ne = np.zeros((1024,256))
ne_p = np.zeros((1024,256))
# Data
data = pg.GData('ne_p.bp')
dg = pg.GInterpModal(data, 2, 'ms')
#dg.interpolate(overwrite=True)
ne_p = data.getValues()
data = pg.GData('ne0.gkyl')
dg = pg.GInterpModal(data, 2, 'ms')
#dg.interpolate(overwrite=True)
ne_bg = data.getValues()
for i in range(1,159): # would like to look at files start from 1 to 159 not 0
data = pg.GData('ne{:d}.gkyl'.format(i))
dg = pg.GInterpModal(data, 2, 'ms')
ne[i,:] = data.getValues() # ERROR HERE
dn_over_n = ne_p/ne # get
....
Error message:
ValueError Traceback (most recent call last)
<ipython-input-35-d6134fb807e8> in <module>
48 dg = pg.GInterpModal(data, 2, 'ms')
49 #dg.interpolate(overwrite=True)
---> 50 ne[i,:] = data.getValues()
ValueError: could not broadcast input array from shape (1024,256,1) into shape (256)
Can someone show me how to fix this and explain what it means?

'numpy.timedelta64' object is not iterable

import numpy as np
base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])
I was trying to use timeseries which is https://github.com/enyfeo/efas/blob/master/work/5_Timeseries.ipynb
I got the following error in the lines I specified; TypeError: 'numpy.timedelta64' object is not iterable
Can you help me? thanks...
Edit;
import pandas as pd
import xarray as xr
import numpy as np
from random import sample
#%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plot
pd.plotting.register_matplotlib_converters()
stations = pd.read_excel('C:/Users/90531/Desktop/Lisflood/KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
station
Thats works for me:
import pandas as pd
from random import sample
stations = pd.read_excel('KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
import xarray as xr
ds = xr.open_dataset('snow.nc')
# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=station.lat.values,y=station.long.values,method='nearest')
import numpy as np
base=dsloc.time.values
time=np.array([base+np.timedelta64(step) for step in dsloc.step.values])
print(time)
output:
['2019-04-01T00:00:00.000000000' '2019-04-01T06:00:00.000000000'
'2019-04-01T12:00:00.000000000' '2019-04-01T18:00:00.000000000'
'2019-04-02T00:00:00.000000000' '2019-04-02T06:00:00.000000000'
'2019-04-02T12:00:00.000000000' '2019-04-02T18:00:00.000000000'
'2019-04-03T00:00:00.000000000' '2019-04-03T06:00:00.000000000'
'2019-04-03T12:00:00.000000000' '2019-04-03T18:00:00.000000000'
'2019-04-04T00:00:00.000000000' '2019-04-04T06:00:00.000000000'
'2019-04-04T12:00:00.000000000' '2019-04-04T18:00:00.000000000'
'2019-04-05T00:00:00.000000000' '2019-04-05T06:00:00.000000000'
'2019-04-05T12:00:00.000000000' '2019-04-05T18:00:00.000000000'
'2019-04-06T00:00:00.000000000' '2019-04-06T06:00:00.000000000'
'2019-04-06T12:00:00.000000000' '2019-04-06T18:00:00.000000000'
'2019-04-07T00:00:00.000000000' '2019-04-07T06:00:00.000000000'
'2019-04-07T12:00:00.000000000' '2019-04-07T18:00:00.000000000'
'2019-04-08T00:00:00.000000000' '2019-04-08T06:00:00.000000000'
'2019-04-08T12:00:00.000000000' '2019-04-08T18:00:00.000000000'
'2019-04-09T00:00:00.000000000' '2019-04-09T06:00:00.000000000'
'2019-04-09T12:00:00.000000000' '2019-04-09T18:00:00.000000000'
'2019-04-10T00:00:00.000000000' '2019-04-10T06:00:00.000000000'
'2019-04-10T12:00:00.000000000' '2019-04-10T18:00:00.000000000'
'2019-04-11T00:00:00.000000000']
Then, where its problem¿? On your nc havent dsloc.step.values, its vector, then u ve to iterate vector
How to fix? just look type of steps, try now:
import pandas as pd
from random import sample
stations = pd.read_excel('KONYA_LONG_LAT_4digit.xlsx')
#station = stations.sample(n=1) # We can randomly choose a station
station=stations[stations['stname'] == 300 ] # We have chosen a station for consistency
import xarray as xr
#ds = xr.open_dataset('snow.nc')
ds = xr.open_dataset('adaptor.efas_mars.external-1615983508.657324-23066-19-648c63b0-a6b0-4568-8970-d0f966ff16a2.nc')
# extract data for selected point in netcdf file by LISFLOOD coordinates
dsloc = ds.sel(x=station.lat.values,y=station.long.values,method='nearest')
import numpy as np
base=dsloc.time.values
steps = dsloc.step.values
if type(steps) == np.timedelta64:
time=np.array([base+np.timedelta64(steps)])
else:
time=np.array([base+np.timedelta64(step) for step in steps])
print(time)

dataset is not callable problems

Im trying to impute NaN values but,first i want to check the best method to calculate this values. Im new using this methods, so im want to use a code i found to capare the differents regressors and choose the best. The original code is this:
from sklearn.experimental import enable_iterative_imputer # noqa
from sklearn.datasets import fetch_california_housing
from sklearn.impute import SimpleImputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
N_SPLITS = 5
rng = np.random.RandomState(0)
X_full, y_full = fetch_california_housing(return_X_y=True)
# ~2k samples is enough for the purpose of the example.
Remove the following two lines for a slower run with different error bars.
X_full = X_full[::10]
y_full = y_full[::10]
n_samples, n_features = X_full.shape
fetch_california_housing is his Dataset.
So, when i try to adapt this code to my case i wrote this code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from numpy import genfromtxt
data = genfromtxt('documents/datasets/df.csv', delimiter=',')
features = data[:, :2]
targets = data[:, 2]
N_SPLITS = 5
rng = np.random.RandomState(0)
X_full, y_full = data(return_X_y= True)
# ~2k samples is enough for the purpose of the example.
# Remove the following two lines for a slower run with different error bars.
X_full = X_full[::10]
y_full = y_full[::10]
n_samples, n_features = X_full.shape
I always get the same error:
AttributeError: 'numpy.ndarray' object is not callable
and before I used my DF as csv (df.csv) the error is the same
AttributeError: 'Dataset' object is not callable
the complete error is this:
ypeError Traceback (most recent call last) <ipython-input-8-3b63ca34361e> in <module>
3 rng = np.random.RandomState(0) 4
----> 5 X_full, y_full = df(return_X_y=True)
6 # ~2k samples is enough for the purpose of the example.
7 # Remove the following two lines for a slower run with different error bars.
TypeError: 'DataFrame' object is not callable
and i dont know how to solve one of both error to go away
I hope to explain well my problem cause my english is not very good

AttributeError: module 'scipy.stats' has no attribute 'signaltonoise'

I'm using scipy signaltonoise function below is the code but it returns an error. I searched regarding this in github too but couldn't find it. Can you please help.
import numpy as np
import cv2
import math
import os
import csv
from scipy import stats
from PIL import Image
from skimage.color import rgb2gray
from multiprocessing import Pool
from skimage.feature import local_binary_pattern # Local Binary Pattern function
from scipy.stats import itemfreq # To calculate a normalized histogram
import scipy.stats as sp
from skimage.feature import hog
from scipy.ndimage.measurements import label
from scipy import signal as sg
def calc_snr(img):
snr = stats.signaltonoise(img, axis=None)
return snr
snr = calc_snr(img)
scipy.stats.signaltonoise() was deprecated in scipy 0.16.0 and removed in 1.0.0. If you need to use the function without downgrading scipy, you can see the original code from the function before it was removed on github here, and reproduced below:
import numpy as np
def signaltonoise(a, axis=0, ddof=0):
a = np.asanyarray(a)
m = a.mean(axis)
sd = a.std(axis=axis, ddof=ddof)
return np.where(sd == 0, 0, m/sd)

Cross validation in random forest using anaconda

I'm using the titanic data set to predict if a passenger survived or not using random forest. This is my code:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
import matplotlib.pyplot as plt
%matplotlib inline
data=pd.read_csv("C:\\Users\\kabala\\Downloads\\Titanic.csv")
data.isnull().any()
data["Age"]=data1["Age"].fillna(data1["Age"].median())
data["PClass"]=data["PClass"].fillna("3rd")
data["PClass"].isnull().any()
data1.isnull().any()
pd.get_dummies(data.Sex)
# choosing the predictive variables
x=data[["PClass","Age","Sex"]]
# the target variable is y
y=data["Survived"]
modelrandom=RandomForestClassifier(max_depth=3)
modelrandom=cross_validation.cross_val_score(modelrandom,x,y,cv=5)
But, I keep on getting this error:
ValueError: could not convert string to float: 'female'
and I don't understand what is the problem because I changed the Sex feature to a dummy
Thanks:)
pd.get_dummies returns a data frame, and does not do the operation in place. Therefore you really are sending a sting with the sex column.
So you would need something like X = pd.get_dummies(data[['Sex','PClass','Age']], columns=['Sex','PClass']) and this should fix your problem. I think PClass will also be a string column you need to use dummy variables, as you have it filling '3rd'.
There are still some more places where you call data.isnull().any() that is not doing anything to the underlying dataframe. I left those as they were, but just FYI they may not be doing what you intended.
Full code would be:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
import matplotlib.pyplot as plt
%matplotlib inline
data=pd.read_csv("C:\\Users\\kabala\\Downloads\\Titanic.csv")
data.isnull().any() <-----Beware this is not doing anything to the data
data["Age"]=data1["Age"].fillna(data1["Age"].median())
data["PClass"]=data["PClass"].fillna("3rd")
data["PClass"].isnull().any() <-----Beware this is not doing anything to the data
data1.isnull().any() <-----Beware this is not doing anything to the data
#********Fix for your code*******
X = pd.get_dummies(data[['Sex','PClass','Age']], columns=['Sex','PClass'])
# choosing the predictive variables
# x=data[["PClass","Age","Sex"]]
# the target variable is y
y=data["Survived"]
modelrandom=RandomForestClassifier(max_depth=3)
modelrandom=cross_validation.cross_val_score(modelrandom,x,y,cv=5)

Categories

Resources