I am currently trying to get the Caltech camera traps benchmark dataset into TFRecords but I am struggling quite a bit. https://lila.science/datasets/caltech-camera-traps. The annotations are displayed as follows:
"info": {"contributor": "Sara Beery", "date_created": "2018-07-03 18:34:36.573636", "version": "Caltech Camera Traps - ECCV18", "description": "Database of camera trap images collected from the NPS and the USGS with help from Justin Brown and Erin Boydston", "year": 2018}]
"categories": [{"id": 6, "name": "bobcat"}, ....]
"images": [{"file_name": "59b93afb-23d2-11e8-a6a3-ec086b02610b.jpg", "rights_holder": "Justin Brown", "height": 1494, "width": 2048, "frame_num": 2, "date_captured": "2012-05-09 07:33:45", "location": 38, "seq_num_frames": 3, "seq_id": "6f04895c-5567-11e8-a3d6-dca9047ef277", "id": "59b93afb-23d2-11e8-a6a3-ec086b02610b"},...]
"annotations": [{"image_id": "59ffbd00-23d2-11e8-a6a3-ec086b02610b", "category_id": 1, "bbox": [1118.72, 570.88, 328.96000000000004, 180.48000000000002], "id": "36132"}
I am trying to use the create_coco_tf_record.py file and adapt it. I do not have the 'iscrowd' or 'segmentations' in my annotations and a lot of the images do not have bounding boxes. I was wondering if someone has done similar and would be able to help please. Thanks! Here is the file...
r"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import logging
import os
import contextlib2
import numpy as np
import PIL.Image
from pycocotools import mask
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
'include_masks', False, 'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
tf.flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
tf.flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
tf.flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
tf.flags.DEFINE_string('train_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_annotations_file', '',
'Validation annotations JSON file.')
tf.flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
# DensePose is only available for coco 2014.
tf.flags.DEFINE_string('train_densepose_annotations_file', '',
'Training annotations JSON file for DensePose.')
tf.flags.DEFINE_string('val_densepose_annotations_file', '',
'Validation annotations JSON file for DensePose.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
'remove all annotations for non-person objects.')
tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
'remove all examples that do not contain a person.')
logger = tf.get_logger()
b'nose', b'left_eye', b'right_eye', b'left_ear', b'right_ear',
b'left_shoulder', b'right_shoulder', b'left_elbow', b'right_elbow',
b'left_wrist', b'right_wrist', b'left_hip', b'right_hip',
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
def clip_to_unit(x):
return min(max(x, 0.0), 1.0)
def create_tf_example(image,
"""Converts image and annotations to a tf.Example proto.
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
coordinates in the official COCO dataset are given as [x, y, width,
height] tuples using absolute coordinates where x, y represent the
top-left (0-indexed) corner. This function converts to the format
expected by the Tensorflow Object Detection API (which is which is
[ymin, xmin, ymax, xmax] with coordinates normalized relative to image
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed by the
'id' field of each category. See the label_map_util.create_category_index
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
keypoint_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
num_densepose_annotation_skipped: Number of DensePose annotations that were
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
keypoints_x = []
keypoints_y = []
keypoints_visibility = []
keypoints_name = []
num_keypoints = []
include_keypoint = keypoint_annotations_dict is not None
num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
dp_part_index = []
dp_x = []
dp_y = []
dp_u = []
dp_v = []
dp_num_points = []
densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
include_densepose = densepose_annotations_dict is not None
num_densepose_annotation_used = 0
num_densepose_annotation_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
category_id = int(object_annotations['category_id'])
category_name = category_index[category_id]['name'].encode('utf8')
if remove_non_person_annotations and category_name != b'person':
num_annotations_skipped += 1
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
# if include_masks:
# run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
# image_height, image_width)
# binary_mask = mask.decode(run_len_encoding)
# if not object_annotations['iscrowd']:
# binary_mask = np.amax(binary_mask, axis=2)
# pil_image = PIL.Image.fromarray(binary_mask)
# output_io = io.BytesIO()
# pil_image.save(output_io, format='PNG')
# encoded_mask_png.append(output_io.getvalue())
# if include_keypoint:
# annotation_id = object_annotations['id']
# if annotation_id in keypoint_annotations_dict:
# num_keypoint_annotation_used += 1
# keypoint_annotations = keypoint_annotations_dict[annotation_id]
# keypoints = keypoint_annotations['keypoints']
# num_kpts = keypoint_annotations['num_keypoints']
# keypoints_x_abs = keypoints[::3]
# keypoints_x.extend(
# [float(x_abs) / image_width for x_abs in keypoints_x_abs])
# keypoints_y_abs = keypoints[1::3]
# keypoints_y.extend(
# [float(y_abs) / image_height for y_abs in keypoints_y_abs])
# keypoints_visibility.extend(keypoints[2::3])
# keypoints_name.extend(_COCO_KEYPOINT_NAMES)
# num_keypoints.append(num_kpts)
# else:
# keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
# keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
# keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
# keypoints_name.extend(_COCO_KEYPOINT_NAMES)
# num_keypoints.append(0)
# if include_densepose:
# annotation_id = object_annotations['id']
# if (annotation_id in densepose_annotations_dict and
# all(key in densepose_annotations_dict[annotation_id]
# for key in densepose_keys)):
# dp_annotations = densepose_annotations_dict[annotation_id]
# num_densepose_annotation_used += 1
# dp_num_points.append(len(dp_annotations['dp_I']))
# dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
# for i in dp_annotations['dp_I']])
# # DensePose surface coordinates are defined on a [256, 256] grid
# # relative to each instance box (i.e. absolute coordinates in range
# # [0., 256.]). The following converts the coordinates
# # so that they are expressed in normalized image coordinates.
# dp_x_box_rel = [
# clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
# dp_x_norm = [(float(x) + x_box_rel * width) / image_width
# for x_box_rel in dp_x_box_rel]
# dp_y_box_rel = [
# clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
# dp_y_norm = [(float(y) + y_box_rel * height) / image_height
# for y_box_rel in dp_y_box_rel]
# dp_x.extend(dp_x_norm)
# dp_y.extend(dp_y_norm)
# dp_u.extend(dp_annotations['dp_U'])
# dp_v.extend(dp_annotations['dp_V'])
# else:
# dp_num_points.append(0)
# if (remove_non_person_images and
# not any(name == b'person' for name in category_names)):
# return (key, None, num_annotations_skipped,
# num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
feature_dict = {
# if include_masks:
# feature_dict['image/object/mask'] = (
# dataset_util.bytes_list_feature(encoded_mask_png))
# if include_keypoint:
# feature_dict['image/object/keypoint/x'] = (
# dataset_util.float_list_feature(keypoints_x))
# feature_dict['image/object/keypoint/y'] = (
# dataset_util.float_list_feature(keypoints_y))
# feature_dict['image/object/keypoint/num'] = (
# dataset_util.int64_list_feature(num_keypoints))
# feature_dict['image/object/keypoint/visibility'] = (
# dataset_util.int64_list_feature(keypoints_visibility))
# feature_dict['image/object/keypoint/text'] = (
# dataset_util.bytes_list_feature(keypoints_name))
# num_keypoint_annotation_skipped = (
# len(keypoint_annotations_dict) - num_keypoint_annotation_used)
# if include_densepose:
# feature_dict['image/object/densepose/num'] = (
# dataset_util.int64_list_feature(dp_num_points))
# feature_dict['image/object/densepose/part_index'] = (
# dataset_util.int64_list_feature(dp_part_index))
# feature_dict['image/object/densepose/x'] = (
# dataset_util.float_list_feature(dp_x))
# feature_dict['image/object/densepose/y'] = (
# dataset_util.float_list_feature(dp_y))
# feature_dict['image/object/densepose/u'] = (
# dataset_util.float_list_feature(dp_u))
# feature_dict['image/object/densepose/v'] = (
# dataset_util.float_list_feature(dp_v))
# num_densepose_annotation_skipped = (
# len(densepose_annotations_dict) - num_densepose_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return (key, example, num_annotations_skipped,
num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
output_path, include_masks,
"""Loads COCO annotation json files and converts to tf.Record format.
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: number of output file shards.
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
annotations_index = {}
if 'annotations' in groundtruth_data:
logging.info('Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
logging.info('%d images are missing annotations.',
keypoint_annotations_index = {}
if keypoint_annotations_file:
with tf.gfile.GFile(keypoint_annotations_file, 'r') as kid:
keypoint_groundtruth_data = json.load(kid)
if 'annotations' in keypoint_groundtruth_data:
for annotation in keypoint_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in keypoint_annotations_index:
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
densepose_annotations_index = {}
if densepose_annotations_file:
with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
densepose_groundtruth_data = json.load(fid)
if 'annotations' in densepose_groundtruth_data:
for annotation in densepose_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in densepose_annotations_index:
densepose_annotations_index[image_id] = {}
densepose_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
total_num_densepose_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
keypoint_annotations_dict = None
if keypoint_annotations_file:
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
densepose_annotations_dict = None
if densepose_annotations_file:
densepose_annotations_dict = {}
if image['id'] in densepose_annotations_index:
densepose_annotations_dict = densepose_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
num_densepose_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict, densepose_annotations_dict,
remove_non_person_annotations, remove_non_person_images)
total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped += (
shard_idx = idx % num_shards
if tf_example:
logging.info('Finished writing, skipped %d annotations.',
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
if densepose_annotations_file:
logging.info('Finished writing, skipped %d DensePose annotations.',
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
if not tf.gfile.IsDirectory(FLAGS.output_dir):
train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
if __name__ == '__main__':
You can leave the iscrowd and segmentations as empty if you are doing object detection or classification or ... tasks. But You wouldn't make use of the data with missing bounding boxes for such tasks.
I'm making a Flask App that would take an image input ,process it and save the results in a JSON file,but after processing the image it gives me a Type Error mentioned in the title.To add more,it prints only one line and then stops;
Below is my Flask API that I'm using;
def upload_analyze():
if request.method == 'POST':
# check if a file was passed into the POST request
if 'file' not in request.files:
flash('No file was uploaded.')
return redirect(request.url)
f = request.files['file']
filename = secure_filename(f.filename)
image = cv2.imread(filename)
#return 'file uploaded successfully'
# image_file = request.files['image']
clt = KMeans(n_clusters = 3)
dataset = pd.read_csv('bb22.csv')
X = dataset.iloc[:, 1: 8].values
sc = StandardScaler()
global orig , r
# load the image, convert it to grayscale, and blur it slightly
#images = np.array(Image.open(image_file))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
# perform edge detection, then perform a dilation + erosion to
# close gaps in between object edges
edged = cv2.Canny(gray, 50, 100)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)
# find contours in the edge map
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
# sort the contours from left-to-right and initialize the
# 'pixels per metric' calibration variable
(cnts, _) = contours.sort_contours(cnts)
pixelsPerMetric = None
object_num = 0
objects = []
orig = image.copy()
counter = 0
leng = [0] * 400
width = [0] *400
# loop over the contours individually
for c in cnts:
# if the contour is not sufficiently large, ignore it
if cv2.contourArea(c) < 50:
# compute the rotated bounding box of the contour
box = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
box = np.array(box, dtype="int")
# order the points in the contour such that they appear
# in top-left, top-right, bottom-right, and bottom-left
# order, then draw the outline of the rotated bounding box
box = perspective.order_points(box)
cv2.drawContours(orig, [box.astype("int")], -1, (0, 255, 0), 2)
# unpack the ordered bounding box, then compute the midpoint
# between the top-left and top-right coordinates, followed by
# the midpoint between bottom-left and bottom-right coordinates
(tl, tr, br, bl) = box
(tltrX, tltrY) = midpoint(tl, tr)
(blbrX, blbrY) = midpoint(bl, br)
# compute the midpoint between the top-left and top-right points,
# followed by the midpoint between the top-righ and bottom-right
(tlblX, tlblY) = midpoint(tl, bl)
(trbrX, trbrY) = midpoint(tr, br)
# compute the Euclidean distance between the midpoints
dA = dist.euclidean((tltrX, tltrY), (blbrX, blbrY))
dB = dist.euclidean((tlblX, tlblY), (trbrX, trbrY))
# if the pixels per metric has not been initialized, then
# compute it as the ratio of pixels to supplied metric (in this case, inches)
if pixelsPerMetric is None:
pixelsPerMetric = dB / 22.599 #previously its /22.50
# compute the size of the object
area = round(cv2.contourArea(c) / (pixelsPerMetric**2), 3)
perimeter = round(cv2.arcLength(c, True)/ pixelsPerMetric, 3)
hull = cv2.convexHull(c)
hull_area = round(cv2.contourArea(hull) / (pixelsPerMetric**2), 3)
(x,y),(ma,MA),angle = cv2.fitEllipse(c)
eccentricity = round(np.sqrt(1-(ma/MA)**2),3)
C = round(4*np.pi*area/perimeter**2, 3)
dimA = round(dA / pixelsPerMetric, 3)
dimB = round(dB / pixelsPerMetric, 3)
if (dimA >= dimB):
leng[counter] = str(dimB)
width[counter] = str(dimA)
counter = counter +1
x,y,w,h = cv2.boundingRect(c)
mask = np.zeros(image.shape[:2],np.uint8)
cv2.drawContours(mask, [c],-1, 255, -1)
dst = cv2.bitwise_and(image, image, mask=mask)
# pre-process the image for classification
if len(new_img) == 0:
WB = 0
object_num = object_num+1
image1 = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
image1 = new_img.reshape((image1.shape[0] * new_img.shape[1], 3))
#classify color
count = 0
global dominant_color
dominant_color = [0,0,0]
for (color) in (clt.cluster_centers_):
a = [color.astype("uint8").tolist()[0], color.astype("uint8").tolist()[1],
count = count+1
if(count == 2) and (a != [0, 0, 0]):
dominant_color = a
#prepare image for broken classification
new_img = cv2.resize(new_img, (64, 64))
new_img = new_img.astype("float") / 255.0
new_img = img_to_array(new_img)
new_img = np.expand_dims(new_img, axis=0)
# classify the input image
with graph.as_default():
(yes, no) = model.predict(new_img)[0]
# build the label
if (yes > no):
WB = 0
y_new = "Broken"
if object_num == 1:
WB = 1
X_new = array([[dimA, dimB, area, perimeter, hull_area, eccentricity, C]])
X_new = sc.transform(X_new)
y_new = type_model.predict(X_new)
print("X=%s, Predicted=%s" % (X_new[0], y_new))
obj_num=object_num-1 # because one item on the left most side we have for the pixel constant value
content = {
"Object_number": obj_num,
"Width": dimA,
"Length": dimB,
#"Area": area,
#"Perimeter": perimeter,
#"hull_area": hull_area,
#"eccentricity": eccentricity,
#"compactness": C,
"WB": WB # Whole or Broken
#"Type": str(y_new[0]),
#"color_rgb": dominant_color,
#"color_hex": rgb2hex(dominant_color[2], dominant_color[1], dominant_color[0])
with open('test6.json', 'w') as fout:
json.dump(objects , fout)
return 'ok'
Also in console only this 1 line gets printed:
X=[ 0.38739663 -0.25583995 0.22674784 -0.2933872 0.19980647 -0.03758974
0.4759277 ], Predicted=[4]
I'm returning this message to make sure that the JSON file is created but it doesn't gets created..I can't figure out what is wrong with the return type ..kindly help.
The views in Flask require a hashable return type. You can always convert your return values to hashable types viz string, dict, tuple etc and then transform from the result.
return { "data": [ { "name": "my name", age: "27" } ] }
User oz19 commented
You need to serialize objects before returning. import json and then json.dumps(objects)
and also
You have a return(objects) at the end of for c in cnts. That could be the problem
So the solution if, not using jsonify, is to call json.dumps on the list before returning it.
If you are using this below method, you can easily get required data in json format
# don't forgot to import jsonify
from flask import Flask, request, redirect, jsonify
#app.route('/sample', methods = ['GET', 'POST'])
def sample():
if(request.method == 'GET'): # i am using get you can change whatever you want
data = [{"A": "a",
"B": "b",
"C": "c",
return jsonify({'data': data})
# now you can start your json dumping process here after
Hope it helps!
I'm trying to save both, the depth and color images of the Intel Realsense D435i camera in a list of 300 images. Then I will use multiprocessing to save this chunk of 300 images onto my disk. But every time I try, the program successfully appends 15 images in the list and then I get this error:
Frame didn't arrived within 5000
I made sure I had the 64 bit version on python 3.6 installed and the camera streams perfectly well when I do not try to save the images in a list. The real-sense viewer works great too. I also tried with different resolutions and frame rates but it doesn't seem to work either. What is interesting is if I only save the color images, I will not get the same error, instead I will get the same color image over and over in the list.
if __name__ == '__main__':
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)
profile = pipeline.start(config)
depth_sensor = profile.get_device().first_depth_sensor()
rs.option.visual_preset, 3
) # Set high accuracy for depth sensor
depth_scale = depth_sensor.get_depth_scale()
align_to = rs.stream.color
align = rs.align(align_to)
# Init variables
im_count = 0
image_chunk = []
image_chunk2 = []
# sentinel = True
while True:
# Wait for a coherent pair of frames: depth and color
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)
aligned_depth_frame = aligned_frames.get_depth_frame()
color_frame = aligned_frames.get_color_frame()
if not aligned_depth_frame or not color_frame:
print("problem here")
raise RuntimeError("Could not acquire depth or color frames.")
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
except Exception as e:
# Stop streaming
I simply need it to save 300 images in a row, that's all, so I am quite troubled as to what is causing this issue.
Holding onto the frame locks the memory, and eventually it hits a limit, which prevents acquiring more images. Even though you are creating an image, the data is still from the frame. You need to clone the image after you create it to release the link to the frame's memory.
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
depth_image = depth_image.copy()
color_image = color_image.copy()
Read more on frames and memory management here:
I created a wrapper class to extract the various elements out of the frame set that can't be recreated later. It's a bit heavy, but shows some common operations that might be helpful for others:
colorizer = None
align_to_depth = None
align_to_color = None
pointcloud = rs.pointcloud()
class IntelD435ImagePacket:
Class that contains image and associated processing data.
def frame_id(self):
return self._frame_id
def timestamp(self):
return self._timestamp
def image_color(self):
return self._image_color
def image_depth(self):
return self._image_depth
def image_color_aligned(self):
return self._image_color_aligned
def image_depth_aligned(self):
return self._image_depth_aligned
def image_depth_colorized(self):
if not self._image_depth_colorized:
self._image_depth_colorized = cv2.applyColorMap(self.image_depth, cv2.COLORMAP_JET);
return self._image_depth_colorized
def intrinsics(self):
return self._intrinsics
def pointcloud(self):
return self._pointcloud
def pointcloud_texture(self):
return self._pointcloud_texture
def _rs_intrinsics_to_opencv_matrix(self, rs_intrinsics):
fx = rs_intrinsics.fx
fy = rs_intrinsics.fy
cx = rs_intrinsics.ppx
cy = rs_intrinsics.ppy
s = 0 # skew
return np.array([fx, s, cx,
0, fy, cy,
0, 0, 1]).reshape(3, 3)
def __init__(self, frame_set, frame_id=None, timestamp=None, *args, **kwargs):
global colorizer
if not colorizer:
colorizer = rs.colorizer()
colorizer.set_option(rs.option.color_scheme, 0)
global align_to_depth
if not align_to_depth:
align_to_depth = rs.align(rs.stream.depth)
global align_to_color
if not align_to_color:
align_to_color = rs.align(rs.stream.color)
global pointcloud
if not pointcloud:
pointcloud = rs.pointcloud()
# Get intrinsics
profile = frame_set.get_profile()
video_stream_profile = profile.as_video_stream_profile()
rs_intrinsics = video_stream_profile.get_intrinsics()
self._intrinsics = self._rs_intrinsics_to_opencv_matrix(rs_intrinsics)
# Get pointcloud
depth_frame = frame_set.get_depth_frame()
color_frame = frame_set.get_color_frame()
points = pointcloud.calculate(depth_frame)
vtx = np.asanyarray(points.get_vertices())
points_arr = vtx.view(np.float32).reshape(vtx.shape + (-1,)).copy()
self._pointcloud = points_arr
# Get pointcloud texture mapping
tex = np.asanyarray(points.get_texture_coordinates())
color_map_arr = tex.view(np.float32).reshape(tex.shape + (-1,)).copy()
self._pointcloud_texture = color_map_arr
# Extract color image
color_frame = frame_set.get_color_frame()
self._image_color = np.asanyarray(color_frame.get_data()).copy()
# Extract depth image
depth_frame = frame_set.get_depth_frame()
self._image_depth = np.asanyarray(depth_frame.get_data()).copy()
# Align the color frame to depth frame and extract color image
color_frame_aligned = align_to_depth.process(frame_set).get_color_frame()
self._image_color_aligned = np.asanyarray(color_frame_aligned.get_data()).copy()
# Align the depth frame to color frame and extract depth image
depth_frame_aligned = align_to_color.process(frame_set).get_depth_frame()
self._image_depth_aligned = np.asanyarray(depth_frame_aligned.get_data()).copy()
self._image_depth_colorized = None
if frame_id:
self._frame_id = frame_id
self._frame_id = frame_set.frame_number
if timestamp:
self._timestamp = timestamp
self._timestamp = frame_set.timestamp