Azure ML output from pipeline - python
I am trying to construct a pipeline in Microsoft Azure having (for now) a simple python script in input.
The problem is that I cannot find my output.
In my Notebooks section I have constructed the following two codes:
1) script called "test.ipynb"
# azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset, Datastore
import pandas as pd
import numpy as np
import datetime
import math
#Upload datasets
subscription_id = 'myid'
resource_group = 'myrg'
workspace_name = 'mywn'
workspace = Workspace(subscription_id, resource_group, workspace_name)
dataset_zre = Dataset.get_by_name(workspace, name='file1')
dataset_SLA = Dataset.get_by_name(workspace, name='file2')
df_zre = dataset_zre.to_pandas_dataframe()
df_SLA = dataset_SLA.to_pandas_dataframe()
result = pd.concat([df_SLA,df_zre], sort=True)
result.to_csv(path_or_buf="/mnt/azmnt/code/Users/aniello.spiezia/outputs/output.csv",index=False)
def_data_store = workspace.get_default_datastore()
def_data_store.upload(src_dir = '/mnt/azmnt/code/Users/aniello.spiezia/outputs', target_path = '/mnt/azmnt/code/Users/aniello.spiezia/outputs', overwrite = True)
print("\nFinished!")
#End of the file
2) pipeline code called "pipeline.ipynb"
import os
import pandas as pd
import json
import azureml.core
from azureml.core import Workspace, Run, Experiment, Datastore
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import CondaDependencies, RunConfiguration
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.telemetry import set_diagnostics_collection
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline, PipelineData, StepSequence
print("SDK Version:", azureml.core.VERSION)
###############################
ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
'Subscription id: ' + ws.subscription_id,
'Resource group: ' + ws.resource_group, sep = '\n')
experiment_name = 'aml-pipeline-cicd' # choose a name for experiment
project_folder = '.' # project folder
experiment = Experiment(ws, experiment_name)
print("Location:", ws.location)
set_diagnostics_collection(send_diagnostics=True)
###############################
cd = CondaDependencies.create(pip_packages=["azureml-sdk==1.0.17", "azureml-train-automl==1.0.17", "pyculiarity", "pytictoc", "cryptography==2.5", "pandas"])
amlcompute_run_config = RunConfiguration(framework = "python", conda_dependencies = cd)
amlcompute_run_config.environment.docker.enabled = False
amlcompute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
amlcompute_run_config.environment.spark.precache_packages = False
###############################
aml_compute_target = "aml-compute"
try:
aml_compute = AmlCompute(ws, aml_compute_target)
print("found existing compute target.")
except:
print("creating new compute target")
provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
idle_seconds_before_scaledown=1800,
min_nodes = 0,
max_nodes = 4)
aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
print("Azure Machine Learning Compute attached")
###############################
def_data_store = ws.get_default_datastore()
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))
# Naming the intermediate data as anomaly data and assigning it to a variable
output_data = PipelineData("output_data", datastore = def_blob_store)
print("output_data object created")
step = PythonScriptStep(name = "test",
script_name = "test.ipynb",
compute_target = aml_compute,
source_directory = project_folder,
allow_reuse = True,
runconfig = amlcompute_run_config)
print("Step created.")
###############################
steps = [step]
print("Step lists created")
pipeline = Pipeline(workspace = ws, steps = steps)
print ("Pipeline is built")
pipeline.validate()
print("Pipeline validation complete")
pipeline_run = experiment.submit(pipeline)
print("Pipeline is submitted for execution")
pipeline_run.wait_for_completion(show_output = False)
print("Pipeline run completed")
###############################
def_data_store.download(target_path = '.',
prefix = 'outputs',
show_progress = True,
overwrite = True)
model_fname = 'output.csv'
model_path = os.path.join("outputs", model_fname)
pipeline_run.upload_file(name = model_path, path_or_stream = model_path)
print('Uploaded the model {} to experiment {}'.format(model_fname, pipeline_run.experiment.name))
And this give me the following error:
Pipeline run completed
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-22-a8a523969bb3> in <module>
111
112 # Upload the model file explicitly into artifacts (for CI/CD)
--> 113 pipeline_run.upload_file(name = model_path, path_or_stream = model_path)
114 print('Uploaded the model {} to experiment {}'.format(model_fname, pipeline_run.experiment.name))
115
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/run.py in wrapped(self, *args, **kwargs)
47 "therefore, the {} cannot upload files, or log file backed metrics.".format(
48 self, self.__class__.__name__))
---> 49 return func(self, *args, **kwargs)
50 return wrapped
51
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/run.py in upload_file(self, name, path_or_stream)
1749 :rtype: azure.storage.blob.models.ResourceProperties
1750 """
-> 1751 return self._client.artifacts.upload_artifact(path_or_stream, RUN_ORIGIN, self._container, name)
1752
1753 #_check_for_data_container_id
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/artifacts_client.py in upload_artifact(self, artifact, *args, **kwargs)
108 if isinstance(artifact, str):
109 self._logger.debug("Uploading path artifact")
--> 110 return self.upload_artifact_from_path(artifact, *args, **kwargs)
111 elif isinstance(artifact, IOBase):
112 self._logger.debug("Uploading io artifact")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_restclient/artifacts_client.py in upload_artifact_from_path(self, path, *args, **kwargs)
100 path = os.path.normpath(path)
101 path = os.path.abspath(path)
--> 102 with open(path, "rb") as stream:
103 return self.upload_artifact_from_stream(stream, *args, **kwargs)
104
FileNotFoundError: [Errno 2] No such file or directory: '/mnt/azmnt/code/Users/aniello.spiezia/outputs/output.csv'
Do you know what the problem could be?
In particular I am interested in saving somewhere the output file called "output.csv"
The best way for you to do this depends a bit on how you want to process the output.csv file after the run completed. But, in general you can just write your csv to the ./outputs folder:
# azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset, Datastore
import pandas as pd
import numpy as np
import datetime
import math
#Upload datasets
subscription_id = 'myid'
resource_group = 'myrg'
workspace_name = 'mywn'
workspace = Workspace(subscription_id, resource_group, workspace_name)
dataset_zre = Dataset.get_by_name(workspace, name='file1')
dataset_SLA = Dataset.get_by_name(workspace, name='file2')
df_zre = dataset_zre.to_pandas_dataframe()
df_SLA = dataset_SLA.to_pandas_dataframe()
result = pd.concat([df_SLA,df_zre], sort=True)
if not os.path.isdir('outputs')
os.mkdir('outputs')
result.to_csv('outputs/output.csv', index=False)
print("\nFinished!")
#End of the file
After the run has completed, AzureML will upload the contents of the outputs directory to the run history, so no need to datastore.upload().
Afterwards, you can see the file in http://ml.azure.com when you navigate to the run like my model.pt file below:
See here for some information on the ./outputs and ./logs folders: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-save-write-experiment-files#where-to-write-files
If you actually want to create another DataSet as a result of your Run, please see this post here: Azure Machine Learning Service - dataset API question
In Daniel's example above, you would need to download the output from the run rather than the datastore in your pipeline.ipynb code. Instead of calling def_data_store.download(), you would call pipeline_run.download('outputs/output.csv', '.').
Another option is to output your data using PipelineData. PipelineData represents a named piece of output of a pipeline step, and is useful if you want to connect multiple steps together with inputs and outputs. With PipelineData, you would need to pass the PipelineData object into PythonScriptStep when you declare your step (as part of arguments=[] and outputs=[]), and then have your script read the output path from the command-line arguments.
This notebook has examples of using PipelineData within a pipeline and downloading the outputs: https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb
And this blog post has details about how to handle this within your script (parsing the command-line arguments, creating the output directory, and writing the output file): https://blog.x5ff.xyz/blog/ai-azureml-python-data-pipelines/
Related
Cant generate XGBoost training report in sagemaker, only profiler_report
I am trying to generate the XGBoost trainingreport to see feature importances however the following code only generates the profiler report. from sagemaker import get_execution_role import numpy as np import pandas as pd from sagemaker.predictor import csv_serializer from sagemaker.debugger import Rule, rule_configs # Define IAM role rules=[ Rule.sagemaker(rule_configs.create_xgboost_report()) ] role = get_execution_role() prefix = 'sagemaker/models' my_region = boto3.session.Session().region_name # this line automatically looks for the XGBoost image URI and builds an XGBoost container. xgboost_container = sagemaker.image_uris.retrieve("xgboost", my_region, "latest") bucket_name = 'binary-base' s3 = boto3.resource('s3') try: if my_region == 'us-east-1': s3.create_bucket(Bucket=bucket_name) else: s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region }) print('S3 bucket created successfully') except Exception as e: print('S3 error: ',e) boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('../Data/Base_Model_Data_No_Labels/train.csv') boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'validation/val.csv')).upload_file('../Data/Base_Model_Data_No_Labels/val.csv') boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file('../Data/Base_Model_Data/test.csv' sess = sagemaker.Session() xgb = sagemaker.estimator.Estimator(xgboost_container, role, volume_size =5, instance_count=1, instance_type='ml.m4.xlarge', output_path='s3://{}/{}/output'.format(bucket_name, prefix, 'xgboost_model'), sagemaker_session=sess, rules=rules) xgb.set_hyperparameters(objective='binary:logistic', num_round=100, scale_pos_weight=8.5) xgb.fit({'train': s3_input_train, "validation": s3_input_val}, wait=True) When Checking the output path via: rule_output_path = xgb.output_path + "/" + xgb.latest_training_job.job_name + "/rule-output" ! aws s3 ls {rule_output_path} --recursive We only see the profiler report generated What am I doing wrong/missing? I wish to generate the XGboost Training report to see its feature importances.
Running python script with multiple values of command line arguments
I have a python script for pre-processing audio and it has frame length, frame step and fft length as the command line arguments. I am able to run the code if I have single values of these arguments. I wanted to know if there is a way in which I can run the python script with multiple values of the arguments? For example, get the output if values of fft lengths are 128, 256 and 512 instead of just one value. The code for pre-processing is as follows: import numpy as np import pandas as pd import tensorflow as tf from scipy.io import wavfile import os import time import pickle import random import argparse import configlib from configlib import config as C import mfccwithpaddingandcmd from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.preprocessing import MultiLabelBinarizer from tensorflow import keras from tensorflow.python.keras import Sequential from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout,BatchNormalization,LSTM,Lambda,Reshape,Bidirectional,GRU from tensorflow.keras.callbacks import TensorBoard start = time.time() classes = ['blinds','fan','light','music','tv'] #dire = r"/mnt/beegfs/home/gehani/test_speech_command/" parser = configlib.add_parser("Preprocessing config") parser.add_argument("-dir","--dire", metavar="", help="Directory for the audio files") def pp(): data_list=[] #To save paths of all the audio files.....all audio files in list format in data_list #data_list-->folder-->files in folder for index,label in enumerate(classes): class_list=[] if label=='silence': #creating silence folder and storing 1sec noise audio files silence_path = os.path.join(C["dire"],'silence') if not os.path.exists(silence_path): os.mkdir(silence_path) silence_stride = 2000 #sample_rate = 16000 folder = os.path.join(C["dire"],'_background_noise_') #all silence are kept in the background_noise folder for file_ in os.listdir(folder): if '.wav' in file_: load_path = os.path.join(folder,file_) sample_rate,y = wavfile.read(load_path) for i in range(0,len(y)-sample_rate,silence_stride): file_path = "silence/{}_{}.wav".format(file_[:-4],i) y_slice = y[i:i+sample_rate] wavfile.write(os.path.join(C["dire"],file_path),sample_rate,y_slice) class_list.append(file_path) else: folder = os.path.join(C["dire"],label) for file_ in os.listdir(folder): file_path = '{}/{}'.format(label,file_) #Ex: up/c9b653a0_nohash_2.wav class_list.append(file_path) random.shuffle(class_list) #To shuffle files data_list.append(class_list) #if not a silence file then just append to the datalist X = [] Y = [] preemphasis = 0.985 print("Feature Extraction Started") for i,class_list in enumerate(data_list): #datalist = all files, class list = folder name in datalist, sample = path to the audio file in that particular class list for j,samples in enumerate(class_list): #samples are of the form classes_name/audio file if(samples.endswith('.wav')): sample_rate,audio = wavfile.read(os.path.join(C["dire"],samples)) if(audio.size<sample_rate): audio = np.pad(audio,(sample_rate-audio.size,0),mode="constant") #print("****") #print(sample_rate) #print(preemphasis) #print(audio.shape) coeff = mfccwithpaddingandcmd.mfcc(audio,sample_rate,preemphasis) # 0.985 = preemphasis #print("****") #print(coeff) #print("****") X.append(coeff) #print(X) if(samples.split('/')[0] in classes): Y.append(samples.split('/')[0]) elif(samples.split('/')[0]=='_background_noise_'): Y.append('silence') #print(len(X)) #print(len(Y)) #X= coefficient array and Y = name of the class A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object') for i in range(0,len(X)): A[i] = np.array(X[i]) #Converting list X into array A end1 = time.time() print("Time taken for feature extraction:{}sec".format(end1-start)) MLB = MultiLabelBinarizer() # one hot encoding for converting labels into binary form MLB.fit(pd.Series(Y).fillna("missing").str.split(', ')) Y_MLB = MLB.transform(pd.Series(Y).fillna("missing").str.split(', ')) MLB.classes_ #Same like classes array print(Y_MLB.shape) pickle_out = open("A_all.pickle","wb") #Writes array A to a file A.pickle pickle.dump(A, pickle_out) #pickle is the file containing the extracted features pickle_out.close() pickle_out = open("Y_all.pickle","wb") pickle.dump(Y_MLB, pickle_out) pickle_out.close() pickle_in = open("Y_all.pickle","rb") Y = pickle.load(pickle_in) X = tf.keras.utils.normalize(X) X_train,X_valtest,Y_train,Y_valtest = train_test_split(X,Y,test_size=0.2,random_state=37) X_val,X_test,Y_val,Y_test = train_test_split(X_valtest,Y_valtest,test_size=0.5,random_state=37) print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape) if __name__ == "__main__": configlib.parse(save_fname="last_arguments.txt") print("Running with configuration:") configlib.print_config() pp() The code for MFCC is as follows: import tensorflow as tf import scipy.io.wavfile as wav import numpy as np import matplotlib.pyplot as plt import pickle import argparse import configlib from configlib import config as C # Configuration arguments parser = configlib.add_parser("MFCC config") parser.add_argument("-fl","--frame_length", type=int, default=400, metavar="", help="Frame Length") parser.add_argument("-fs","--frame_step", type=int, default=160, metavar="", help="Frame Step") parser.add_argument("-fft","--fft_length", type=int, default=512, metavar="", help="FFT length") #args = parser.parse_args() def Preemphasis(signal,pre_emp): return np.append(signal[0],signal[1:]-pre_emp*signal[:-1]) def Paddinggg(framelength,framestep,samplerate): frameStart = np.arange(0,samplerate,framestep) frameEnd = frameStart + framelength padding = min(frameEnd[(frameEnd > samplerate)]) - samplerate return padding def mfcc(audio,sample_rate,pre_emp): audio = np.pad(audio,(Paddinggg(C["frame_length"],C["frame_step"],sample_rate),0),mode='reflect') audio = audio.astype('float32') #Normalization audio = tf.keras.utils.normalize(audio) #Preemphasis audio = Preemphasis(audio,pre_emp) stfts = tf.signal.stft(audio,C["frame_length"],C["frame_step"],C["fft_length"],window_fn=tf.signal.hann_window) spectrograms = tf.abs(stfts) num_spectrogram_bins = stfts.shape[-1] lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, sample_rate/2.0, 32 linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,upper_edge_hertz) mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1) mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:])) # Compute a stabilized log to get log-magnitude mel-scale spectrograms. log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6) # Compute MFCCs from log_mel_spectrograms and take the first 13. return log_mel_spectrograms print("End") And the code for configlib is as follows: from typing import Dict, Any import logging import pprint import sys import argparse # Logging for config library logger = logging.getLogger(__name__) # Our global parser that we will collect arguments into parser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars="#") # Global configuration dictionary that will contain parsed arguments # It is also this variable that modules use to access parsed arguments config:Dict[str, Any] = {} def add_parser(title: str, description: str = ""): """Create a new context for arguments and return a handle.""" return parser.add_argument_group(title, description) def parse(save_fname: str = "") -> Dict[str, Any]: """Parse given arguments.""" config.update(vars(parser.parse_args())) logging.info("Parsed %i arguments.", len(config)) # Optionally save passed arguments if save_fname: with open(save_fname, "w") as fout: fout.write("\n".join(sys.argv[1:])) logging.info("Saving arguments to %s.", save_fname) return config def print_config(): """Print the current config to stdout.""" pprint.pprint(config) I use the following command to run my python file: python3.7 preprocessingwithpaddingandcmd.py -fl 1103 -fs 88 -fft 512 -dir /mnt/beegfs/home/gehani/appliances_audio_one_channel Should I be writing a shell script or python has some options for it? EDIT 1 I tried using parser.add_argument('-fft', '--fft_length', type=int, default=[], nargs=3) for getting fft length from the command line and used the command run preprocessingwithpaddingandcmd -dir filepath -fl 1765 -fs 1102 -fft 512 218 64 to run it. But, it gives me this error: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() Can anyone please help?
I found you can do it by these. mfcc features extraction You can create your own mfcc features extraction or you can limit window lengths and ceptrums that is enough for simple works except you need logarithms scales where you can use target matrix ( convolution ) or else. It is logarithms when you use FFT or alternative derivation but mfcc is only extraction where I will provide the sample output in picture. [ Sample ]: from python_speech_features import mfcc from python_speech_features import logfbank import scipy.io.wavfile as wav import tensorflow as tf import matplotlib.pyplot as plt (rate,sig) = wav.read("F:\\temp\\Python\\Speech\\temple_of_love-sisters_of_mercy.wav") mfcc_feat = mfcc(signal=sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True) fbank_feat = logfbank(sig,rate) plt.plot( mfcc_feat[50:42000,0] ) plt.xlabel("sample") plt.show() plt.close() input('...')
Google Vision API problem with batch annotations
I wanted to use Cloud Vision API to detect labels from ca. 40K photographs and download the results as CSV files. I uploaded photos into the cloud storage and used the following code, but the error occured. I asked a person who uses python in his job but he cannot deal with this error. Can you help mi with fixing it? TypeError: Invalid constructor input for BatchAnnotateImagesRequest: [{'image': source { image_uri: "gs://bucket/image-path.jpg" } , 'features': [{'type': <Type.LABEL_DETECTION: 4>}]}] The code I used: from google.cloud import from google.cloud import storage from google.cloud.vision_v1 import ImageAnnotatorClient from google.cloud.vision_v1 import types import os import json import numpy as np os.environ["GOOGLE_APPLICATION_CREDENTIALS"]='C://file-path.json' #(created in step 1) # Get GCS bucket storage_client = storage.Client() bucket = storage_client.bucket('bucket_name') image_paths = [] for blob in list(bucket.list_blobs()): image_paths.append("gs://bucket_name/"+blob.name) # We can send a maximum of 16 images per request. start = 0 end = 16 label_output = [] for i in range(int(np.floor(len(image_paths)/16))+1): requests = [] client = vision.ImageAnnotatorClient() for image_path in image_paths[start:end]: image = types.Image() image.source.image_uri = image_path requests.append({'image': image,'features': [{'type': vision.Feature.Type.LABEL_DETECTION}]}) response = client.batch_annotate_images(requests) for image_path, i in zip(image_paths[start:end], response.responses): labels = [{label.description: label.score} for label in i.label_annotations] labels = {k: v for d in labels for k, v in d.items()} filename = os.path.basename(image_path) l = {'filename': filename, 'labels': labels} label_output.append(l) start = start+16 end = end+16 #export results to CSV file for l in label_output: print('"' + label_output[l]['filename'] + '";', end = '') for label in label_output[l]["labels"]: print('"' + label + '";"' + label_output[l][label] + '";', end = '') print("")
batch_annotate_images() is not getting the contents of requests properly. To fix this, just assign your variable requests explicitly to the parameter requests of batch_annotate_images(). response = client.batch_annotate_images(requests=requests) See batch_annotate_images() for reference. Also if you are planning to update your Vision API to 2.3.1, you might encounter errors on features: see this reference for the updated usage of its parameters.
AttributeError: 'decode' when reading TIFF images
Here is part of the code I am attempting to run: import numpy as np import os import tensorflow as tf import imageio import sys #Create tensorflowflow dataset dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels)) if not is_test: dataset = dataset.shuffle(num_of_samples) dataset = dataset.repeat(None) dataset = dataset.map(self._parse_dataset) if not is_test: batched_dataset = dataset.batch(self.batch_size, drop_remainder=True).prefetch(20) else: batched_dataset = dataset.batch(self.test_batch_size) #Create the iterator return batched_dataset, num_of_samples, path_strings def get_batch(self, subset="train"): batch_of_images = self.iterators[subset].get_next() return batch_of_images def _read_tif(self, file_path): file_path = file_path.decode(sys.getdefaultencoding()) try : im = imageio.imread(file_path) except: im = np.zeros((self.width, self.height, 3)) if len(im.shape) != 3: im = np.repeat(im[:, :, np.newaxis], 3, axis=2) return im def _read_image(self, file_path): return tf.py_function(func=self._read_tif, inp=[file_path], Tout=tf.uint8) and I have the following error coming up: File "C:\PROJECTS_RUNNING2\pipeline\data_loader\data_generator.py", line 131, in _read_tif file_path = file_path.decode(sys.getdefaultencoding()) AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'decode' The file_path is defined in the run.py and looks like this: def main(config_file_path): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config =tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) tf.reset_default_graph() config = parse_config_file(config_file_path) #Create the experiment output folders, this is where the outputs will be saved output_folder_path = config["output_path"] output_path = create_output_folder(output_folder_path, config["experiment_name"]) copyfile(config_file_path, os.path.join(output_path, "%s_parameters.json" % config["experiment_name"])) data_generator = DataGenerator(config) Input and Output dataset file paths are correctly defined in the config file. I a very much beginner in coding though have to use the script for analysis of my images and I am struggling to get it up and running. Im using Python 3.7 and Tensorflow 1.14. Any help to resolve this error will be really much appreciated!
Pyradiomics: Feature class glcm is not recognized. How to fix it?
I am making a project with a GUI for liver ultrasound diagnostics. I use PyQT5 (5.12.1) for GUI and sklearn (0.21.2) for statistics models. Main texture features I get from pyradiomics (2.2.0). When I compile my project in PyCharm 2019.1 - all works completely fine. But when I try to build my project as .exe file with pyinstaller, I got some erors. I solved most of them (about missing libraries) but this one left. I got errors: Feature class firstorder is not recognized Feature class glcm is not recognized Feature class glrlm is not recognized Feature class ngtdm is not recognized Feature class glszm is not recognized and my model also gives an error (when I fit my new data with models that were already saved in .sav files from sklearn): ValueEror: operands could not be broadcast together with shapes (1,3)(96,)(1,3) 1) I tried to change from: extractor.enableFeatureClassByName('glcm') to: extractor.enableFeatureClassByName(str('glcm')) It did not help. 2) Also I tried to build a project at different versions of pyradiomics: 2.1.1 and 2.2.0 give the same result (error) import pandas as pd import numpy as np import pickle import sklearn ... folderName = "tmp" sl = "/" image_path_to = os.getcwd() + "/data/nrrd/" + folderName + sl + name_image label_path_to = os.getcwd() + "/data/nrrd/" + folderName + sl + name_label # Instantiate the extractor extractor = featureextractor.RadiomicsFeatureExtractor() # Switch on only needed feature class extractor.disableAllFeatures() extractor.enableFeatureClassByName('firstorder') <<< There is a problem extractor.enableFeatureClassByName('glcm') extractor.enableFeatureClassByName('glrlm') extractor.enableFeatureClassByName('ngtdm') extractor.enableFeatureClassByName('gldm') extractor.enableFeatureClassByName('glszm') # result -> ordered dict result = extractor.execute(image_path_to, label_path_to) df = pd.DataFrame(result, index=[0]) ... # Load the model from disk model_name = 'Multi-layer Perceptron' poolParam = ["diagnosis_code", "isnorm"] models = [0,5] for param in poolParam: filename = 'data/result/model/' + model_name + ' ' + param + '.sav' file = open(filename, 'rb') loaded = pickle.load(file) print("Model <" + model_name + " " + param + "> was loaded") # Test the classifier y_pred = int(loaded.predict(data)) <<< There is a problem