I'm currently documenting how to convert each data type to be compatible with a new deeplearning framework. I'll cut out redundant code in the futrue :)
The following code can be executed on VScode interactive window.
The code has two part.
convert mat file to npy file (.mat -> .npy)
convert npy file to nifti file (.npy -> nii.gz) and add specific name in the path to match with nnU-Net data format. See the nnU-Net dataset_conversion.md if you're interested in it.
How it actually works?
1)10000001.mat -> 10000001.npy
2)10000001.npy -> AORTA_001_0000.nii.gz
The path can be adjusted by a individual user.
#%%
import numpy as np
import nibabel as nb
import pathlib
import numpy as np
from torch.utils.data import Dataset
import scipy.io
root_data = '/mnt/intern/code/dataset/test/original'
root_label = '/mnt/intern/code/dataset/test/label'
examples = []
examples2 = []
data_files = list(pathlib.Path(root_data).iterdir())
label_files = list(pathlib.Path(root_label).iterdir())
for fname in sorted(data_files):
examples += [fname]
for fname2 in sorted(label_files):
examples2 += [fname2]
for i in range(len(data_files)):
fname = examples[i]
fname2 = examples2[i]
data_name = str(pathlib.Path(fname))
label_name = str(pathlib.Path(fname2))
# d = np.load(data_name); l = np.load(label_name)
d_load = scipy.io.loadmat(data_name);
l_load = scipy.io.loadmat(label_name) # matfile data load
data = d_load['data'];
label = l_load['label'] # (512, 512, 251)
np.save('/mnt/intern/mat2npy/original/' + str(fname).split('.')[0][-8:], data)
np.save('/mnt/intern/mat2npy/label/' + str(fname).split('.')[0][-8:], label)
#%%
# Name change to match with nnU-Net data format
import numpy as np
import nibabel as nb
import pathlib
import numpy as np
from torch.utils.data import Dataset
import scipy.io
import numpy as np
import nibabel as nib
root_data = '/mnt/intern/mat2npy/imagesTr'
root_label = '/mnt/intern/mat2npy/labelsTr'
examples = []
examples2 = []
data_files = list(pathlib.Path(root_data).iterdir())
label_files = list(pathlib.Path(root_label).iterdir())
for fname in sorted(data_files):
examples += [fname]
for fname2 in sorted(label_files):
examples2 += [fname2]
for i in range(len(data_files)):
fname = examples[i]
fname2 = examples2[i]
data_name = str(pathlib.Path(fname))
label_name = str(pathlib.Path(fname2))
# d = np.load(data_name); l = np.load(label_name)
d_load = np.load(data_name);
l_load = np.load(label_name) # matfile data load
data = d_load
label = l_load # (512, 512, 251)
data = np.array(data, dtype=np.float32) # You need to replace normal array by yours
label = np.array(label, dtype=np.float32)
affine = np.eye(4)
nifti_data = nib.Nifti1Image(data, affine)
nifti_label = nib.Nifti1Image(label, affine)
nib.save(nifti_data, '/mnt/intern/mat2npy/imagesTr/' + 'AORTA_' + str(fname).split('.')[0][-3:] + '_0000.nii.gz') # Here you put the path + the extionsion 'nii' or 'nii.gz'
nib.save(nifti_label, '/mnt/intern/mat2npy/labelsTr/' + 'AORTA_' + str(fname).split('.')[0][-3:] + '_0000.nii.gz')
I am trying to add data that I am reading from a series of JSON files to a Numpy array (or whatever data collection would work best). My idea, is that I want to sort a collection of episodes of a tv show by episode title.
The problem I have encountered, is actually creating the collection from the data.
The intent, is that I want to be able to have a collection of the items found within the for loop [a,b,c,d]; for each episode of the show.
Is a Numpy array the best way to go about making this collection, or should I use something else?
season1 = open('THEJSONFILES\seasonone.json', 'r')
season_array = np.array(['episodeTitle','seasonNum', 'episodeNum', 'plotContents'])
def ReadTheDarnJsonFile(jsonTitle):
seasondata = jsonTitle.read()
seasonobj = j.loads(seasondata)
list = (seasonobj['episodes'])
for i in range(len(list)):
a = str(list[i].get('title'))
b = str(list[i].get('seasonNumber'))
c = str(list[i].get('episodeNumber'))
d = str(list[i].get('plot'))
print(a, b, c, d)
print("----------------")
# np.append(season_array, [a,b,c,d]) this is not correct
ReadTheDarnJsonFile(season1)
print(season_array)
2 notes. First I would avoid using list as a variable name because it is a keyword in python. Second I would recommend using a custom class for your data for maximum readability.
season1 = open('THEJSONFILES\seasonone.json', 'r')
season_array = np.array(['episodeTitle','seasonNum', 'episodeNum', 'plotContents'])
class episode:
def __init__(self,title,seasonNumber,episodeNumber,plot):
self.title = title
self.seasonNumber = seasonNumber
self.episodeNumber = episodeNumber
self.plot = plot
def summary(self):
print("Season "+str(self.seasonNumber)+" Episode "+str(self.episodeNumber))
print(self.title)
print(self.plot)
def ReadTheDarnJsonFile(jsonTitle):
seasondata = jsonTitle.read()
seasonobj = j.loads(seasondata)
episodes = (seasonobj['episodes'])
season_array = []
for i in range(len(episodes)):
a = str(list[i].get('title'))
b = str(list[i].get('seasonNumber'))
c = str(list[i].get('episodeNumber'))
d = str(list[i].get('plot'))
season_array.append(episode(a,b,c,d)) this is not correct
return season_array
season_array = Read
TheDarnJsonFile(season1)
for item in season_array:
item.summary()
Here is what I ended up doing.
import json as j
import pandas as pd
emptyArray = []
season1 = open('THEJSONFILES\seasonone.json', 'r')
season2 = open('THEJSONFILES\seasontwo.json', 'r')
season3 = open('THEJSONFILES\seasonthree.json', 'r')
season4 = open('THEJSONFILES\seasonfour.json', 'r')
season5 = open('THEJSONFILES\seasonfive.json', 'r')
season6 = open('THEJSONFILES\seasonsix.json', 'r')
season7 = open('THEJSONFILES\seasonseven.json', 'r')
columnData = ["episodeTitle", "seasonIndex", "episodeIndex", "plot", "imageURL"]
finalDf = pd.DataFrame
def ReadTheDarnJsonFile(jsonTitle):
df = pd.DataFrame(columns = columnData)
seasonData = jsonTitle.read()
seasonObj = j.loads(seasonData)
currentSeasonList = (seasonObj['episodes'])
for i in range(len(currentSeasonList)):
tempTitle = str(currentSeasonList[i].get('title'))
tempSN = str(currentSeasonList[i].get('seasonNumber'))
tempEN = str(currentSeasonList[i].get('episodeNumber'))
tempPlot = str(currentSeasonList[i].get('plot'))
tempImage = str(currentSeasonList[i].get('image'))
dataObj = pd.Series([tempTitle, tempSN, tempEN, tempPlot, tempImage], index=(df.columns))
df.loc[i] = dataObj
emptyArray.append(df)
ReadTheDarnJsonFile(season1)
ReadTheDarnJsonFile(season2)
ReadTheDarnJsonFile(season3)
ReadTheDarnJsonFile(season4)
ReadTheDarnJsonFile(season5)
ReadTheDarnJsonFile(season6)
ReadTheDarnJsonFile(season7)
finalDf = pd.concat(emptyArray)
print(emptyArray)
holyOutput = finalDf.sort_values(by=['episodeTitle'])
holyOutput.reset_index(inplace=True)
holyOutput.to_json("P:\\ProjectForStarWarsCloneWarsJson\JSON\OutputJsonV2.json")
I am trying to reproduce this R code in python using rpy2:
library(gamlss)
library(gamlss.dist)
library(gamlss.add)
x <- c(37.50,46.79,48.30,46.04,43.40,39.25,38.49,49.51,40.38,36.98,40.00,
38.49,37.74,47.92,44.53,44.91,44.91,40.00,41.51,47.92,36.98,43.40,
42.26,41.89,38.87,43.02,39.25,40.38,42.64,36.98,44.15,44.91,43.40,
49.81,38.87,40.00,52.45,53.13,47.92,52.45,44.91,29.54,27.13,35.60,
45.34,43.37,54.15,42.77,42.88,44.26,27.14,39.31,24.80,16.62,30.30,
36.39,28.60,28.53,35.84,31.10,34.55,52.65,48.81,43.42,52.49,38.00,
38.65,34.54,37.70,38.11,43.05,29.95,32.48,24.63,35.33,41.34)
fit <- fitDist(x, k = 2, type = "realplus", trace = FALSE, try.gamlss = TRUE)
summary(fit)
My attempt is:
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri
numpy2ri.activate()
utils = importr('utils')
utils.install_packages('gamlss')
gamlss = importr('gamlss')
base = importr('base')
x = np.array([7.50,46.79,48.30,46.04,43.40,39.25,38.49,49.51,40.38,36.98,40.00,
38.49,37.74,47.92,44.53,44.91,44.91,40.00,41.51,47.92,36.98,43.40,
42.26,41.89,38.87,43.02,39.25,40.38,42.64,36.98,44.15,44.91,43.40,
49.81,38.87,40.00,52.45,53.13,47.92,52.45,44.91,29.54,27.13,35.60,
45.34,43.37,54.15,42.77,42.88,44.26,27.14,39.31,24.80,16.62,30.30,
36.39,28.60,28.53,35.84,31.10,34.55,52.65,48.81,43.42,52.49,38.00,
38.65,34.54,37.70,38.11,43.05,29.95,32.48,24.63,35.33,41.34])
base.fitDist(x, k = 2, type = "realplus", trace = FALSE, try.gamlss = TRUE)
This is a syntax error however because of try.gamlss = TRUE.
How should I do this?
Replace the dot in try.gamlss with an underscore.
The documentation has explanations: https://rpy2.github.io/doc/v3.3.x/html/robjects_rpackages.html
My first post here.
So I'm loading data into a variable called f1_data, then passing it to pm.removeDC() function to do some signal processing, and keeping the result into the same variable. But then, I want to replace only the column 8, with the original f1_data that I called raw_data and I can't figure it out why it doesn't work. Here are the functions. Help anyone?
inside file pm.py
def removeDC(data):
# define the filter
butter_order = 2
hp_cutoff_Hz = 1.0
b, a = signal.butter(butter_order, hp_cutoff_Hz/(fs_Hz / 2.0), 'highpass')
for i in range(1,9):
data[:,i] = signal.lfilter(b, a, data[:,i], 0)
return (data)
def get_epoch1(data, t_sec, epoch, f_tup, col):
#f_tup = (f_wdir, f_name, f_columns, out_save, out_dir, out_number, fig_width)
f_name = f_tup[1]
fig_width = f_tup[6]
epoch_boolvector = (t_sec >= epoch[0][0]) & (t_sec <= epoch[0][1])
epoch_timescale = t_sec[epoch_boolvector]
epoch_data = data[epoch_boolvector]
plt.figure(figsize=(fig_width,8), dpi=96)
plt.plot(epoch_timescale, epoch_data[:,col]);
plt.xlim(epoch_timescale[0], epoch_timescale[-1])
plt.show()
return (epoch_boolvector, epoch_timescale, epoch_data)
inside main file
#load the whole data
(f1_data, f1_data_indices, f1_timescale) = pm.load_data(f1_wdir, f1_name)
raw_data = f1_data[:] #create copy of f1_data
(f1ep1_boolvector, f1ep1_timescale, f1ep1_data) = pm.get_epoch1(f1_data, f1_timescale, f1_epochs[1], f1_tup, 8)
#--- filter data to remove DC (1Hz)
f1_data = pm.removeDC(f1_data)
# replace only channel 8 with original data
f1_data[:,8] = raw_data[:,8]
(f1ep2_boolvector, f1ep2_timescale, f1ep2_data) = pm.get_epoch1(f1_data, f1_timescale, f1_epochs[1], f1_tup, 8)
The solution is import copy and use copy.deepcopy function.
For further info check this link:
docs.python.org/2/library/copy.html
When I have raw_data = f1_data[:] I get, after pm.removeDC():
raw_data is f1_data: False
(raw_data == f1_data).all(): True
But when I have raw_data = copy.deepcopy(f1_data) I get, after pm.removeDC():
raw_data is f1_data: False
(raw_data == f1_data).all(): False