How to use CSV in mne.io.read_raw_fif()? - python

I have a CSV file about EEG signals. I want to use his file with men package
so I try this code in colab:
import numpy as np
import mne
from mne.channels import make_standard_montage
from numpy import genfromtxt
my_data = genfromtxt('dataset.csv', delimiter=',',dtype=None)
# Some information about the channels
ch_names = ['CH 1', 'CH 2', 'CH 3'] # TODO: finish this list
# Sampling rate of the Nautilus machine
sfreq = 5000 # Hz
# Create the info structure needed by MNE
info = mne.create_info(ch_names, sfreq)
# Finally, create the Raw object
raw = mne.io.Raw(my_data, info)
# Plot it!
raw.plot()
this is error:
here I know my_data: it was (numpy.ndarray), and it should be with
raw.fif, raw.fif.gz, raw_sss.fif, raw_sss.fif.gz, raw_tsss.fif,
raw_tsss.fif.gz, or _meg.fif. If a file-like object is provided,
preloading must be used, as it mention here.
So my question is, how can I convert CSV to one of these formats? is there any suggestion?
Thanks.

Related

Tkinter and EEG forward operator with a template MRI

I'm following this tutorial:
https://mne.tools/dev/auto_tutorials/forward/35_eeg_no_mri.html#sphx-glr-auto-tutorials-forward-35-eeg-no-mri-py
but can't figure out how to plot the dynamic image in a tkinter frame python 3.8
Besides when plotting outside tkinter it changes the resolution and size of fullscreen tkinter window to a smaller size.
I want to plot mne.viz.plot_alignment... in a tkinter frame
Any suggestions are welcome.
import os.path as op
import numpy as np
import mne
from mne.datasets import eegbci
from mne.datasets import fetch_fsaverage
# Download fsaverage files
fs_dir = fetch_fsaverage(verbose=True)
subjects_dir = op.dirname(fs_dir)
# The files live in:
subject = 'fsaverage'
trans = 'fsaverage' # MNE has a built-in fsaverage transformation
src = op.join(fs_dir, 'bem', 'fsaverage-ico-5-src.fif')
bem = op.join(fs_dir, 'bem', 'fsaverage-5120-5120-5120-bem-sol.fif')
raw_fname, = eegbci.load_data(subject=1, runs=[6])
raw = mne.io.read_raw_edf(raw_fname, preload=True)
# Clean channel names to be able to use a standard 1005 montage
new_names = dict(
(ch_name,
ch_name.rstrip('.').upper().replace('Z', 'z').replace('FP', 'Fp'))
for ch_name in raw.ch_names)
raw.rename_channels(new_names)
# Read and set the EEG electrode locations
montage = mne.channels.make_standard_montage('standard_1005')
raw.set_montage(montage)
raw.set_eeg_reference(projection=True) # needed for inverse modeling
# Check that the locations of EEG electrodes is correct with respect to MRI
mne.viz.plot_alignment(
raw.info, src=src, eeg=['original', 'projected'], trans=trans,
show_axes=True, mri_fiducials=True, dig='fiducials')

Select dimensions by name from a dask chunk

I have some ensemble files in grib format that I would like to lazy load in Python using dask and xarray. Based in https://climate-cms.org/2018/09/14/dask-era-interim.html, I managed to lazy load the files as intended, but now I want to slice and select the dimensions to plot the data for some time and level.
UPDATE: I've recently came back to this issue and I finally figured out that instead of using da.concatenate, I should use da.stack. This simple change solved my problem. This issue is updated accordingly, in case anyone need an example on how to create an ensemble of grib files using python (with dask arrays for lazy load), to load and plot data in the same fashion as one would do using softwares like GrADS.
My program looks like:
import dask
import dask.array as da
import xarray as xr
import pandas as pd
import numpy as np
from glob import glob
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt
bpath = '/some/path/to/my/data'
# pressure levels
levels =['1000', '925', '850', '700', '500', '300', '250', '200', '50']
# ensemble member names
ensm = ['M01', 'M02', 'M03', 'M04', 'M05']
#dask.delayed
def open_file_delayed(file, vname):
ds = xr.open_dataset(file, decode_cf='False', engine='pynio')
return ds
def open_file(file, vname, nlevs, nlats, nlons, ftype):
file_data = open_file_delayed(file, vname)[vname].data
return da.from_delayed(file_data, (nlevs, nlats, nlons), ftype)
# list of files to open (sorted by date)
# filename mask: PREFIXMEMYYYYiMMiDDiHHiYYYYfMMfDDfHHf.grb
# MEM: member name (see the levels list)
# YYYYiMMiDDiHHi: analysis date (passed as an argument to the open_file function)
# YYYYfMMfDDfHHf: forecast date
files = sorted(glob(bpath + '/%(dateanl)s/%(mem)s/PREFIX%(mem)s%(dateanl)s*.grb'%
{'dateanl': date, 'mem': member}))
ntime = len(files)
# open the first file in the list to get dimensions and coordinates
ds0 = xr.open_dataset(files[0], decode_cf='False', engine='pynio')
var0 = ds0[vname]
levs = ds0.lv_ISBL2.data
lats = ds0.g4_lat_0.data
lons = ds0.g4_lon_1.data
nlevs = ds0.lv_ISBL2.size
nlats = ds0.g4_lat_0.size
nlons = ds0.g4_lon_1.size
ftype = var0.dtype
ds0.close()
# calculate the date range of the forecasts, in my case len(date_fmt) = 61 (every grib file has 61 times and 9 levels)
date_fmt = pd.date_range(start=datetime.strptime(date, "%Y%m%d%H"), freq="6H", periods=ntime)
# call the function 'open_file' for all files contained in the list 'files' and stack them up
dask_var = da.stack([open_file(file, vname, nlevs, nlats, nlons, ftype) for file in files], axis=0)
# xda is the data array with all files
xda = xr.DataArray(dask_var, dims=['tlev', 'lat', 'lon'])
# set coordinates values
xda.coords['time'] = ('time', date_fmt)
xda.coords['lat'] = ('lat', lats)
xda.coords['lon'] = ('lon', lons)
return xda
To use this code, I do (for a single analysis date - 202005300 - May 30, 2020, and a variable called ZGEO):
Note: this part is very fast (it takes miliseconds), as we are just creating a map structure to the actual data, similar to a GrADS control file.
lens_zgeo = [open_ensemble('2020053000', ens, 'ZGEO') for ens in ensm]
dens_zgeo = xr.concat(lens_zgeo, dim='ens')
dens_zgeo.coords['ens'] = ('ens', ensm)
dens_zgeo is a data array with the following sctructure:
data array structure
From this point, I can slice the dimensions of the data array and plot (which was what I've intented originally):
Note: this part takes longer because the data needs to be read from the disk.
dens_zgeo.isel(ens=0,time=0,lev=0).plot()
BOOM, case closed. Thanks!
I've edited the question with the modifications I needed in order to get the result I wanted. For this case, the main point is the use of da.stack instead of da.concatenate. By doing so, I've got the resulting data array to get concatenated in the ensemble dimension I needed.

How to georeference an unreferenced aerial image using ground control points in python

I have a series of unreferenced aerial images that I would like to georeference using python. The images are identical spatially (they are actually frames extracted from a video), and I obtained ground control points for them by manually georeferencing one frame in ArcMap. I would like to apply the ground control points I obtained to all the subsequent images, and as a result obtain a geo-tiff or a jpeg file with a corresponding world file (.jgw) for each processed image. I know this is possible to do using arcpy, but I do not have access to arcpy, and would really like to use a free open source module if possible.
My coordinate system is NZGD2000 (epsg 2193), and here is the table of control points I wish to apply to my images:
176.412984, -310.977264, 1681255.524654, 6120217.357425
160.386905, -141.487145, 1681158.424227, 6120406.821253
433.204947, -310.547238, 1681556.948690, 6120335.658359
Here is an example image: https://imgur.com/a/9ThHtOz
I've read a lot of information on GDAL and rasterio, but I don't have any experience with them, and am failing to adapt bits of code I found to my particular situation.
Rasterio attempt:
import cv2
from rasterio.warp import reproject
from rasterio.control import GroundControlPoint
from fiona.crs import from_epsg
img = cv2.imread("Example_image.jpg")
# Creating ground control points (not sure if I got the order of variables right):
points = [(GroundControlPoint(176.412984, -310.977264, 1681255.524654, 6120217.357425)),
(GroundControlPoint(160.386905, -141.487145, 1681158.424227, 6120406.821253)),
(GroundControlPoint(433.204947, -310.547238, 1681556.948690, 6120335.658359))]
# The function requires a parameter "destination", but I'm not sure what to put there.
# I'm guessing this may not be the right function to use
reproject(img, destination, src_transform=None, gcps=points, src_crs=from_epsg(2193),
src_nodata=None, dst_transform=None, dst_crs=from_epsg(2193), dst_nodata=None,
src_alpha=0, dst_alpha=0, init_dest_nodata=True, warp_mem_limit=0)
GDAL attempt:
from osgeo import gdal
import osr
inputImage = "Example_image.jpg"
outputImage = "image_gdal.jpg"
dataset = gdal.Open(inputImage)
I = dataset.ReadAsArray(0,0,dataset.RasterXSize,dataset.RasterYSize)
outdataset = gdal.GetDriverByName('GTiff')
output_SRS = osr.SpatialReference()
output_SRS.ImportFromEPSG(2193)
outdataset = outdataset.Create(outputImage,dataset.RasterXSize,dataset.RasterYSize,I.shape[0])
for nb_band in range(I.shape[0]):
outdataset.GetRasterBand(nb_band+1).WriteArray(I[nb_band,:,:])
# Creating ground control points (not sure if I got the order of variables right):
gcp_list = []
gcp_list.append(gdal.GCP(176.412984, -310.977264, 1681255.524654, 6120217.357425))
gcp_list.append(gdal.GCP(160.386905, -141.487145, 1681158.424227, 6120406.821253))
gcp_list.append(gdal.GCP(433.204947, -310.547238, 1681556.948690, 6120335.658359))
outdataset.SetProjection(srs.ExportToWkt())
wkt = outdataset.GetProjection()
outdataset.SetGCPs(gcp_list,wkt)
outdataset = None
I don't quite know how to make the above code work, and I would really appreciate any help with this.
I ended up reading a book "Geoprocessing with Python" and finally found a solution that worked for me. Here is the code I adapted to my problem:
import shutil
from osgeo import gdal, osr
orig_fn = 'image.tif'
output_fn = 'output.tif'
# Create a copy of the original file and save it as the output filename:
shutil.copy(orig_fn, output_fn)
# Open the output file for writing for writing:
ds = gdal.Open(output_fn, gdal.GA_Update)
# Set spatial reference:
sr = osr.SpatialReference()
sr.ImportFromEPSG(2193) #2193 refers to the NZTM2000, but can use any desired projection
# Enter the GCPs
# Format: [map x-coordinate(longitude)], [map y-coordinate (latitude)], [elevation],
# [image column index(x)], [image row index (y)]
gcps = [gdal.GCP(1681255.524654, 6120217.357425, 0, 176.412984, 310.977264),
gdal.GCP(1681158.424227, 6120406.821253, 0, 160.386905, 141.487145),
gdal.GCP(1681556.948690, 6120335.658359, 0, 433.204947, 310.547238)]
# Apply the GCPs to the open output file:
ds.SetGCPs(gcps, sr.ExportToWkt())
# Close the output file in order to be able to work with it in other programs:
ds = None
For your gdal method, just using gdal.Warp with the outdataset should work, e.g.
outdataset.SetProjection(srs.ExportToWkt())
wkt = outdataset.GetProjection()
outdataset.SetGCPs(gcp_list,wkt)
gdal.Warp("output_name.tif", outdataset, dstSRS='EPSG:2193', format='gtiff')
This will create a new file, output_name.tif.
As an addition to #Kat's answer, to avoid quality loss of the original image file and set the nodata-value to 0, the following can be used.
#Load the original file
src_ds = gdal.Open(orig_fn)
#Create tmp dataset saved in memory
driver = gdal.GetDriverByName('MEM')
tmp_ds = driver.CreateCopy('', src_ds, strict=0)
#
# ... setting GCP....
#
# Setting no data for all bands
for i in range(1, tmp_ds.RasterCount + 1):
f = tmp_ds.GetRasterBand(i).SetNoDataValue(0)
# Saving as file
driver = gdal.GetDriverByName('GTiff')
ds = driver.CreateCopy(output_fn, tmp_ds, strict=0)

How to use numpy in the Programmable Filter in ParaView

Assume, I have a ProgrammableFilter in paraview, which gets two inputs: mesh1 with data and mesh2 without.
Furthermore, I know the permutation of the points from mesh1 to mesh2.
Inside the filter, I can access the point values through
data0=inputs[0].GetPointData().GetArray('data')`
and obtain a part of the array using
subData=data0[0:6]
for example. But how could I add this subData to the output without a python loop?
To experiment with the code, I created a (not so small) working example:
#!/usr/bin/python
from paraview.simple import *
import numpy as np
import vtk
from vtk.util.numpy_support import numpy_to_vtk
#generate an arbitrary source with data
mesh2=Sphere()
mesh2.Center=[0.0, 0.0, 0.0]
mesh2.EndPhi=360
mesh2.EndTheta=360
mesh2.PhiResolution=100
mesh2.Radius=1.0
mesh2.StartPhi=0.0
mesh2.StartTheta=0.0
mesh2.ThetaResolution=100
mesh2.UpdatePipeline()
#add the data
mesh2Vtk=servermanager.Fetch(mesh2)
nPointsSphere=mesh2Vtk.GetNumberOfPoints()
mesh2Data=paraview.vtk.vtkFloatArray()
mesh2Data.SetNumberOfValues(nPointsSphere)
mesh2Data.SetName("mesh2Data")
#TODO: use numpy here?? do this with a ProgrammableFilter ?
data=np.random.rand(nPointsSphere,1)
for k in range(nPointsSphere):
mesh2Data.SetValue(k, data[k])
mesh2Vtk.GetPointData().AddArray(mesh2Data)
#send back to paraview server
#from https://public.kitware.com/pipermail/paraview/2011-February/020120.html
t=TrivialProducer()
filter= t.GetClientSideObject()
filter.SetOutput(mesh2Vtk)
t.UpdatePipeline()
w=CreateWriter('Sphere_withData.vtp')
w.UpdatePipeline()
Delete(w)
#create mesh1 without data
mesh1=Line()
mesh1.Point1=[0,0,0]
mesh1.Point2=[0,0,1]
mesh1.Resolution=5
mesh1.UpdatePipeline()
progFilter=ProgrammableFilter(mesh1)
progFilter.Input=[mesh1, t]
progFilter.Script="curT=inputs[1].GetPointData().GetArray('mesh2Data')"\
"\nglobIndices=range(0,6)"\
"\nsubT=curT[globIndices]"\
"\nswap=vtk.vtkFloatArray()"\
"\nswap.SetNumberOfValues(len(globIndices))"\
"\nswap.SetName('T')"\
"\n#TODO: how can i avoid this loop, i.e. write output.GetPointData().AddArray(converToVTK(subT))"\
"\nfor k in range(len(globIndices)):"\
"\n swap.SetValue(k,subT[k])"\
"\noutput.PointData.AddArray(swap)"
progFilter.UpdatePipeline()
w=CreateWriter('Line_withData.vtp')
w.UpdatePipeline()
Delete(w)
I accepted the answer, because it looks right. The following two scripts even show the problem:
base script 'run.py':
src1='file1.vtu'
r1=XMLUnstructuredGridReader(FileName=src1)
progFilter=ProgrammableFilter(r1)
progFilter.Input=[r1]
with open('script.py','r') as myFile:
progFilter.Script=myFile.read()
progFilter.UpdatePipeline()
progData=progFilter.GetPointDataInformation()
print progData.GetArray('T2').GetRange()
and the script for the programmable filter:
import vtk
import vtk.numpy_interface.dataset_adapter as dsa
import numpy as np
globIndices=inputs[0].GetPointData().GetArray('T')
subT=np.ones((globIndices.shape[0],1))
subTVtk=dsa.VTKArray(subT)
output.PointData.append(subTVtk, 'T2')
With this combination, I get the error messages:
File "/usr/lib/python2.7/dist-packages/vtk/numpy_interface/dataset_adapter.py", line 652, in append
self.VTKObject.AddArray(arr)
TypeError: AddArray argument 1: method requires a VTK object
File "run.py", line 15, in
print progData.GetArray('T2').GetRange()
AttributeError: 'NoneType' object has no attribute 'GetRange'
The first error message stems seems to be the reason for the second one.
Here's a minimal example that creates a VTK data array from a Numpy array. You should be able to adapt it for your purposes.
import numpy as np
import vtk
from vtk.numpy_interface import dataset_adapter as da
np_arr = np.ones(6)
vtk_arr = da.VTKArray(np_arr)
output.PointData.append(vtk_arr, "my data")

Pandas and Python image to numpy array [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 5 years ago.
Improve this question
I'm currently teaching myself pandas and python for machine learning. I've done fine with text data thus far, but dealing with image data with limited knowledge of python and pandas is tripping me.
I have read in a .csv file into pandas dataframe, with one of its columns containing url to an image. So this is what shows when I get info from the dataframe.
dataframe = pandas.read_csv("./sample.csv")
dataframe.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total of 5 columns):
name 5000 non-null object
...
image 5000 non-null object
the image column contains url to the image. The problem is, I do not know how to import the image data from this and save it as numpy array for processing.
Any help is appreciated. Thanks in advance!
If you want to download the images from the web and then, for example, rotate your images from your dataframe, and save the results you can use the following code:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
from PIL import Image
import urllib2 as urllib
import io
df = pd.DataFrame({
"name": ["Butterfly", "Birds"],
"image": ["https://upload.wikimedia.org/wikipedia/commons/0/0c/Two-tailed_pasha_%28Charaxes_jasius_jasius%29_Greece.jpg",
'https://upload.wikimedia.org/wikipedia/commons/c/c5/Bat_cave_in_El_Maviri_Sinaloa_-_Mexico.jpg']})
def rotate_image(image, theta):
"""
3D rotation matrix around the X-axis by angle theta
"""
rotation_matrix = np.c_[
[1,0,0],
[0,np.cos(theta),-np.sin(theta)],
[0,np.sin(theta),np.cos(theta)]
]
return np.einsum("ijk,lk->ijl", image, rotation_matrix)
for i, imageUrl in enumerate(df.image):
print imageUrl
fd = urllib.urlopen(imageUrl)
image_file = io.BytesIO(fd.read())
im = Image.open(image_file)
im_rotated = rotate_image(im, np.pi)
fig = plt.figure()
plt.imshow(im_rotated)
plt.axis('off')
fig.savefig(df.name.ix[i] + ".jpg")
If instead you want to show the pictures you can do:
plt.show()
The resulting pictures are birds and butterfly which can be seen here as well:
As we don't know your csv-file, you have to tune your pd.read_csv() for your case.
Here i'm using requests to download some image in-memory.
These are then decoded with the help of scipy (which you already should have; if not: you can use Pillow too).
The decoded images are then raw numpy-arrays and shown by matplotlib.
Keep in mind, that we are not using temporary-files here and everything is hold in memory. Read also this (answer by jfs).
For people missing some required libs, one should be able to do the same with (code needs to be changed of course):
requests can be replaced with urllib (standard lib)
i'm not showing code, but this SO-question should be a good start
another relevant SO-question talking about in-memory processing with urllib
pandas can be replaced by csv (standard lib)
scipy can be replaced by Pillow (although internal storage might differ then)
matplotlib is just for demo-purposes (not sure if Pillow allows showing images; edit: it seems it can)
I just selected some random images from some german newspage.
Edit: Free images from wikipedia now used!
Code:
import requests # downloading images
import pandas as pd # csv- / data-input
from scipy.misc import imread # image-decoding -> numpy-array
import matplotlib.pyplot as plt # only for demo / plotting
# Fake data -> pandas DataFrame
urls_df = pd.DataFrame({'urls': ['https://upload.wikimedia.org/wikipedia/commons/thumb/c/cb/Rescue_exercise_RCA_2012.jpg/500px-Rescue_exercise_RCA_2012.jpg',
'https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/Clinotarsus_curtipes-Aralam-2016-10-29-001.jpg/300px-Clinotarsus_curtipes-Aralam-2016-10-29-001.jpg',
'https://upload.wikimedia.org/wikipedia/commons/thumb/9/9f/US_Capitol_east_side.JPG/300px-US_Capitol_east_side.JPG']})
# Download & Decode
imgs = []
for i in urls_df.urls: # iterate over column / pandas Series
r = requests.get(i, stream=True) # See link for stream=True!
r.raw.decode_content = True # Content-Encoding
imgs.append(imread(r.raw)) # Decoding to numpy-array
# imgs: list of numpy arrays with varying shapes of form (x, y, 3)
# as we got 3-color channels
# Beware!: downloading png's might result in a shape of (x, y, 4)
# as some alpha-channel might be available
# For more options: https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imread.html
# Plot
f, arr = plt.subplots(len(imgs))
for i in range(len(imgs)):
arr[i].imshow(imgs[i])
plt.show()
Output:

Categories

Resources