I am trying to predict values by loading a saved version of my model.
here is the code for it-
def classifier(img, weights_file):
# Load the model
model = tf.lite.TFLiteConverter.from_keras_model(weights_file)
# Create the array of the right shape to feed into the keras model
data = np.ndarray(shape=(1, 200, 200, 3), dtype=np.float32)
image = img
# image sizing
size = (200, 200)
image = ImageOps.fit(image, size, Image.ANTIALIAS)
# turn the image into a numpy array
image_array = np.asarray(image)
# Normalize the image
normalized_image_array = image_array.astype(np.float32) / 255
# Load the image into the array
data[0] = normalized_image_array
# run the inference
prediction_percentage = model.predict(data)
prediction = prediction_percentage.round()
return prediction, prediction_percentage
My model throws an error " 'TFLiteKerasModelConverterV2' object has no attribute 'predict'"
Can anyone please tell me what can i change here?
You are creating a TFLiteConverter object from your weights file. The correct way to load the model weights is using load_weights link. Try:
tf.keras.model.load_weights(weights_file)
However, you also would first need to define the model the same way as you did when training the model. If you have saved your model in SavedModel format, use
model = tf.keras.models.load_model(weights_file)
Related
I am trying to run a pre-trained ONNX model (trained on a third-party labeling tool) for image recognition. The model is trained via some pre-defined labels in the tool. The next aim now is to be able to run this model outside the tool. For the same, I am taking a sample image and trying to run the same via model to get the identified labels as output. While doing so I hit an impediment regarding how to adjust the inputs. The model needs inputs as follows:
How can I adjust my inputs in the following code?
import cv2
import numpy as np
import onnxruntime
import pytesseract
import PyPDF2
# Load the image
image = cv2.imread("example.jpg")
# Check if the image has been loaded successfully
if image is None:
raise ValueError("Failed to load the image")
# Get the shape of the image
height, width = image.shape[:2]
# Make sure the height and width are positive
if height <= 0 or width <= 0:
raise ValueError("Invalid image size")
# Set the desired size of the resized image
dsize = (640, 640)
# Resize the image using cv2.resize
resized_image = cv2.resize(image, dsize)
# Display the resized image
cv2.imshow("Resized Image", resized_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Load the ONNX model
session = onnxruntime.InferenceSession("ic/model.onnx")
# Check if the model has been loaded successfully
if session is None:
raise ValueError("Failed to load the model")
# Get the input names and shapes of the model
inputs = session.get_inputs()
for i, input_info in enumerate(inputs):
print(f"Input {i}: name = {input_info.name}, shape = {input_info.shape}")
# Run the ONNX model
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
prediction = session.run([output_name], {input_name: image})[0]
# Postprocess the prediction to obtain the labels
labels = postprocess(prediction)
# Use PyTesseract to extract the text from the image
text = pytesseract.image_to_string(image)
# Print the labels and the text
print("Labels:", labels)
print("Text:", text)
Because the code throws the following error:
ValueError: Model requires 4 inputs. Input Feed contains 1
I tried to execute the ViT model from Image Classification with Hugging Face Transformers and Keras, I got an error, particularly in this instruction:
processed_dataset = ds.map(augmentation, batched=True)
the error :
ValueError: Exception encountered when calling layer "resizing_8"
(type Resizing).
Attempt to convert a value (<PIL.BmpImagePlugin.BmpImageFile image
mode=L size=190x100 at 0x7F35C52AD210>) with an unsupported type
(<class 'PIL.BmpImagePlugin.BmpImageFile'>) to a Tensor.
Call arguments received: • inputs=<PIL.BmpImagePlugin.BmpImageFile
image mode=L size=190x100 at 0x7F35C52AD210>
I tried the answer in this link ArrowTypeError: Could not convert <PIL.PngImagePlugin.PngImageFile image mode=RGB size=32x32 at 0x7F2223B6ED10>, where I added 'img': Image(decode=True, id=None) to my features in create_image_folder_dataset() and I still have the same problem except for a small change in this part
ValueError: Exception encountered when calling layer "resizing_13"
(type Resizing).
What I should do to solve this problem?
create_image_folder_dataset function:
def create_image_folder_dataset(root_path):
"""creates `Dataset` from image folder structure"""
# get class names by folders names
_CLASS_NAMES= os.listdir(root_path)
# defines `datasets` features`
features=datasets.Features({
"img": datasets.Image(decode=True, id=None),
#"img": datasets.Image(),
"label": datasets.features.ClassLabel(names=_CLASS_NAMES),
})
#print(_CLASS_NAMES)
# temp list holding datapoints for creation
img_data_files=[]
label_data_files=[]
# load images into list for creation
for img_class in os.listdir(root_path):
for img in os.listdir(os.path.join(root_path,img_class)):
path_=os.path.join(root_path,img_class,img)
img_data_files.append(path_)
label_data_files.append(img_class)
# create dataset
ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
return ds
ds = create_image_folder_dataset("/content/drive/MyDrive/FINAL_DATASET")
ds[0] """ return:
{'img': <PIL.BmpImagePlugin.BmpImageFile image mode=L size=190x100 at 0x7F35C54ECC10>,
'label': 0}"""
my Augmentation function :
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers
model_id = "google/vit-base-patch16-224-in21k"
#google/vit-base-patch32-384
feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)
# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
[
layers.Resizing(feature_extractor.size, feature_extractor.size),
layers.Rescaling(1./255),
layers.RandomFlip("horizontal"),
layers.RandomRotation(factor=0.02),
layers.RandomZoom(
height_factor=0.2, width_factor=0.2
),
],
name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
print(examples["img"])
examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
return examples
# basic processing (only resizing)
def process(examples):
examples.update(feature_extractor(examples['img'], ))
return examples
# we are also renaming our label col to labels to use `.to_tf_dataset` later
#ds = ds.rename_column("label", "labels")
Now it's working, I convert my dataset from "L" to "RGB".
I am making image search using the one-shot model because I have very few data for per class.
I am following this tutorial
Already prepared the datapipeline and trained the model. But I didn't understand the single image prediction process which we do which we do generally by model.predict.
I tried the following code but I think I am missing something.
img1 = cv2.imread("./images_evaluation/test.jpg",cv2.IMREAD_GRAYSCALE)
img1 = cv2.resize(img1,(105,105))
img1 = np.expand_dims(cv2.resize(img1, (105,105)), axis=2)
(test_image_names, train_image_names) = generate_oneshot_validation_trials(dataset, 20)
train_images = get_images(train_image_names, IMAGE_SHAPE)
images = np.tile(img1, (len(train_images), 1, 1, 1))
preds = siamese_model1.predict([images, train_images])
pred_idx = np.argmax(preds, axis=0)[0]
pred_char_name = train_image_names[pred_idx].split('/')[-2]
print(pred_char_name) ## here, finding different prediction after every try. whats the reason?
I trained a model, now I would like to use it to detect objects in images. Using the DefaultDetector only the boundyboxes are returned, I would need the masks. I saw that you can also perform inference with this method:
model.eval()
with torch.no_grad():
outputs = model(inputs)
I think that's what he should use. The problem is that I don't know how to set the inputs, starting with images.
import torch
import glob
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/"
"mask_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class
cfg.INPUT.FORMAT = "BGR"
#Just run these lines if you have the trained model im memory
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model
#build model
model = build_model(cfg)
DetectionCheckpointer(model).load("output/model_final.pth")
model.eval()#make sure its in eval mode
image = cv2.imread("/kaggle/working/detectron2/images/73-ab1.jpg")
height, width = image.shape[:2]
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
image = ImageList.from_tensors([image])
with torch.no_grad():
inputs = image
outputs = model(inputs)
Unfortunately, however, I think I'm wrong, can someone enlighten me?
See the Model Input Format for the builtin models.
Basically, the model in your code is not expecting an ImageList object, but a list of dicts where each dict needs to provide specific information about one image, as explained in the documentation linked above.
So, your inference code needs to be corrected to the following.
image = cv2.imread("/kaggle/working/detectron2/images/73-ab1.jpg")
height, width = image.shape[:2]
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = [{"image": image, "height": height, "width": width}]
with torch.no_grad():
outputs = model(inputs)
You can also see this in the code - the forward method of the GeneralizedRCNN class.
I am new to OpenCV and TensorFlow. I am trying to get a live camera preview and use the live camera feed for TensorFlow prediction. Here is the part of code for live preview and prediction:
image = np.zeros((64, 64, 3))
softmax_pred = tf.nn.softmax(conv_net(x, weights, biases, image_size, 1.0))
cam = cv2.VideoCapture(0)
while True:
ret_val, img = cam.read()
img = cv2.flip(img,1)
cv2.imshow('my webcam',img)
img = img.resize((64,64))
image = array(img).reshape(1,64,64,3)
image.astype(float)
result = sess.run(softmax_pred, feed_dict={x: image})
I am not sure what's wrong here. I am getting this error:
image = array(img).reshape(1,64,64,3)
ValueError: total size of new array must be unchanged
My Tensor placeholder for image has the shape Tensor '(?, 64, 64, 3)'. I did the same for jpeg image by manually loading an image from disk and reshaping that image to (1,64,643) and it works fine.Here is the code for manually loading an image and then predicting:
img = Image.open('/home/pragyan/Documents/miniProject/PredictImages/IMG_4804.JPG')
img = img.resize((64, 64))
image = array(img).reshape(1,64,64,3)
image.astype(float)
result = sess.run(softmax_pred, feed_dict={x: image})
The above code works but while reshaping a live frame from webcam gives me this error(ValueError: total size of new array must be unchanged). Is there a way to fix this? I am not able to understand how to fix it.