Working With Video Intelligence API Object Detection in Video Using Python



Working With Video Intelligence API Object Detection in Video Using Python


You can use the Video Intelligence API to detect and track objects in a video.

Copy the following code into your IPython session:

from google.cloud import videointelligence
from google.cloud.videointelligence import enums, types


def track_objects(video_uri, segments=None):
    video_client = videointelligence.VideoIntelligenceServiceClient()
    features = [enums.Feature.OBJECT_TRACKING]
    context = types.VideoContext(segments=segments)

    print(f'Processing video "{video_uri}"...')
    operation = video_client.annotate_video(
        input_uri=video_uri,
        features=features,
        video_context=context,
    )
    return operation.result()

Take a moment to study the code and see how it uses the annotate_video client library method with the OBJECT_TRACKING parameter to analyze a video and detect objects.

Call the function to analyze the video from seconds 98 to 112:

video_uri = 'gs://cloudmleap/video/next/JaneGoodall.mp4'
segment = types.VideoSegment()
segment.start_time_offset.FromSeconds(98)
segment.end_time_offset.FromSeconds(112)
response = track_objects(video_uri, [segment])

Wait a moment for the video to be processed:

Processing video "gs://cloudmleap/video/next/JaneGoodall.mp4"...

Add this function to print out the list of detected objects:

def print_detected_objects(response, min_confidence=.7):
    # First result only, as a single video is processed
    annotations = response.annotation_results[0].object_annotations
    annotations = [a for a in annotations if min_confidence <= a.confidence]

    print(f' Detected objects: {len(annotations)}'
          f' ({min_confidence:.0%} <= confidence) '
          .center(80, '-'))
    for annotation in annotations:
        entity = annotation.entity
        description = entity.description
        entity_id = entity.entity_id
        confidence = annotation.confidence
        start_ms = annotation.segment.start_time_offset.ToMilliseconds()
        end_ms = annotation.segment.end_time_offset.ToMilliseconds()
        frames = len(annotation.frames)
        print(f'{description:<22}',
              f'{entity_id:<10}',
              f'{confidence:4.0%}',
              f'{start_ms:>7,}',
              f'{end_ms:>7,}',
              f'{frames:>2} fr.',
              sep=' | ')
  

Call the function:

print_detected_objects(response)

You should see something like this:

------------------- Detected objects: 3 (70% <= confidence) --------------------
insect                 | /m/03vt0   |  87% |  98,840 | 101,720 | 25 fr.
insect                 | /m/03vt0   |  71% | 108,440 | 111,080 | 23 fr.
butterfly              | /m/0cyf8   |  91% | 111,200 | 111,920 |  7 fr.

Add this function to print out the list of detected object frames and bounding boxes:

def print_object_frames(response, entity_id, min_confidence=.7):
    def keep_annotation(annotation):
        return all([
            annotation.entity.entity_id == entity_id,
            min_confidence <= annotation.confidence])

    # First result only, as a single video is processed
    annotations = response.annotation_results[0].object_annotations
    annotations = [a for a in annotations if keep_annotation(a)]
    for annotation in annotations:
        description = annotation.entity.description
        confidence = annotation.confidence
        print(f' {description},'
              f' confidence: {confidence:.0%},'
              f' frames: {len(annotation.frames)} '
              .center(80, '-'))
        for frame in annotation.frames:
            frame_ms = frame.time_offset.ToMilliseconds()
            box = frame.normalized_bounding_box
            print(f'{frame_ms:>7,}',
                  f'({box.left:.5f}, {box.top:.5f})',
                  f'({box.right:.5f}, {box.bottom:.5f})',
                  sep=' | ')
  
Call the function with the entity ID for insects:


print_object_frames(response, '/m/03vt0')

You should see something like this:

--------------------- insect, confidence: 87%, frames: 25 ----------------------
 98,840 | (0.49327, 0.19617) | (0.69905, 0.69633)
 98,960 | (0.49559, 0.19308) | (0.70631, 0.69671)
...
101,600 | (0.46668, 0.19776) | (0.76619, 0.69371)
101,720 | (0.46805, 0.20053) | (0.76447, 0.68703)
--------------------- insect, confidence: 71%, frames: 23 ----------------------
108,440 | (0.47343, 0.10694) | (0.63821, 0.98332)
108,560 | (0.46960, 0.10206) | (0.63033, 0.98285)
...
110,960 | (0.49466, 0.05102) | (0.65941, 0.99357)
111,080 | (0.49572, 0.04728) | (0.65762, 0.99868)

Comments