mmtrack.core.evaluation.eval_sot_vot 源代码

# Copyright (c) OpenMMLab. All rights reserved.
# The codes are modified from https://github.com/votchallenge/toolkit/blob/master/vot/analysis/supervised.py # noqa: E501
import numpy as np

try:
    import vot
    from vot.analysis import is_special
    from vot.region import Polygon, Rectangle, Special
    from vot.region import calculate_overlaps as calculate_region_overlaps
except ImportError:
    vot = None


[文档]def bbox2region(bbox):
    """Convert bbox to Rectangle or Polygon Class object.

    Args:
        bbox (ndarray): the format of rectangle bbox is (x1, y1, w, h);
            the format of polygon is (x1, y1, x2, y2, ...).

    Returns:
        Rectangle or Polygon Class object.
    """
    if vot is None:
        raise ImportError(
            'Please run'
            ' pip install git+https://github.com/votchallenge/toolkit.git '
            'to manually install vot-toolkit')

    if len(bbox) == 1:
        return Special(bbox[0])
    elif len(bbox) == 4:
        return Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
    elif len(bbox) % 2 == 0 and len(bbox) > 4:
        return Polygon([(x_, y_) for x_, y_ in zip(bbox[::2], bbox[1::2])])
    else:
        raise NotImplementedError(
            f'The length of bbox is {len(bbox)}, which is not supported')


def trajectory2region(trajectory):
    """Convert bbox trajectory to Rectangle or Polygon Class object trajectory.

    Args:
        trajectory (list[ndarray]): The outer list contains bbox of
            each frame in a video. The bbox is a ndarray.

    Returns:
        List: contains the Region Class object of each frame in a
            trajectory.
    """
    traj_region = []
    for bbox in trajectory:
        traj_region.append(bbox2region(bbox))
    return traj_region


def locate_failures_inits(trajectory):
    """locate the failure frame and initialized frame in a trajectory.

    Args:
        trajectory (list[ndarray]): list of tracking results.

    Returns:
        fail_inds (list): index of failed frame in a trajectory.
        init_inds (list): index of initialized frame in a trajectory.
    """
    fail_inds = []
    init_inds = []
    for i, bbox in enumerate(trajectory):
        if len(bbox) == 1:
            if bbox[0] == 1.:
                init_inds.append(i)
            elif bbox[0] == 2.:
                fail_inds.append(i)
    return fail_inds, init_inds


def count_failures(trajectory):
    """count the number of failed frame in a trajectory.

    Args:
        trajectory (list[ndarray]): list of tracking results.

    Returns:
        List: the number of failed frame in a trajectory.
    """
    num_fails = 0
    for bbox in trajectory:
        if len(bbox) == 1 and bbox[0] == 2.:
            num_fails += 1
    return num_fails


def calc_accuracy(gt_trajectory,
                  pred_trajectory,
                  burnin=10,
                  ignore_unknown=True,
                  video_wh=None):
    """Calculate accuracy over the sequence.

    Args:
        gt_trajectory (list[list]): list of bboxes
        pred_trajectory (list[ndarray]): The outer list contains the
            tracking results of each frame in one video. The ndarray has two
            cases:
                - bbox: denotes the normal tracking box in [x1, y1, w, h]
                    format.
                - special tracking state: [0] denotes the unknown state,
                    namely the skipping frame after failure, [1] denotes the
                    initialized state, and [2] denotes the failed state.
        burnin: number of frames that have to be ignored after the
            re-initialization when calculating accuracy. Default is 10.
        ignore_unknown (bool): whether ignore the skipping frames after
            failures when calculating accuracy. Default is True.
        video_wh: bounding region (width, height)

    Return:
        Float: accuracy over the sequence.
    """
    pred_traj_region = trajectory2region(pred_trajectory)
    gt_traj_region = trajectory2region(gt_trajectory)
    overlaps = np.array(
        calculate_region_overlaps(pred_traj_region, gt_traj_region, video_wh))
    mask = np.ones(len(overlaps), dtype=bool)

    for i, region in enumerate(pred_traj_region):
        if is_special(region, Special.UNKNOWN) and ignore_unknown:
            mask[i] = False
        elif is_special(region, Special.INITIALIZATION):
            for j in range(i, min(len(pred_traj_region), i + burnin)):
                mask[j] = False
        elif is_special(region, Special.FAILURE):
            mask[i] = False
    return np.mean(overlaps[mask]) if any(mask) else 0.


[文档]def eval_sot_accuracy_robustness(results,
                                 annotations,
                                 burnin=10,
                                 ignore_unknown=True,
                                 videos_wh=None):
    """Calculate accuracy and robustness over all tracking sequences.

    Args:
        results (list[list[ndarray]]): The first list contains the
            tracking results of each video. The second list contains the
            tracking results of each frame in one video. The ndarray have two
            cases:
                - bbox: denotes the normal tracking box in [x1, y1, w, h]
                    format.
                - special tracking state: [0] denotes the unknown state,
                    namely the skipping frame after failure, [1] denotes the
                    initialized state, and [2] denotes the failed state.
        annotations (list[ndarray]): The list contains the gt_bboxes of each
            video. The ndarray is gt_bboxes of one video. It's in (N, 4) shape.
            Each bbox is in (x1, y1, w, h) format.
        burnin: number of frames that have to be ignored after the
            re-initialization when calculating accuracy. Default is 10.
        ignore_unknown (bool): whether ignore the skipping frames after
            failures when calculating accuracy. Default is True.
        videos_wh (list[tuple(width, height), ...]): The list contains the
            width and height of each video. Default is None.

    Return:
        dict{str: float}: accuracy and robustness in EAO evaluation metric.
    """
    if vot is None:
        raise ImportError(
            'Please run'
            'pip install git+https://github.com/votchallenge/toolkit.git'
            'to manually install vot-toolkit')

    accuracy = 0
    num_fails = 0
    weight = 0
    for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)):
        assert len(gt_traj) == len(pred_traj)
        assert len(pred_traj[0]) == 1 and pred_traj[0][0] == 1
        num_fails += count_failures(pred_traj)
        accuracy += calc_accuracy(
            gt_traj,
            pred_traj,
            burnin=burnin,
            ignore_unknown=ignore_unknown,
            video_wh=videos_wh[i]) * len(pred_traj)
        weight += len(pred_traj)

    accuracy /= weight
    robustness = num_fails / weight * 100
    return dict(accuracy=accuracy, robustness=robustness, num_fails=num_fails)


def calc_eao_curve(overlaps, successes):
    """Calculate EAO curve over all tracking sequences.

    Args:
        overlaps (list[list]): The outer list contains the overlaps of each
            video. The inner list contains the overlap of each frame in one
            video.
        successes (list): The list contains the tracking states of last frame
            in each fragment.

    Return:
        ndarray: The N-th element in ndarray denotes the average overlaps from
            1 to N in all fragments.
    """
    max_length = max([len(_) for _ in overlaps])
    total_runs = len(overlaps)

    overlaps_array = np.zeros((total_runs, max_length), dtype=np.float32)
    # mask out frames which are not considered in EAO calculation. initial
    # value are zero, meaning ignored.
    mask = np.zeros((total_runs, max_length), dtype=np.float32)
    for i, (overlap, success) in enumerate(zip(overlaps, successes)):
        overlaps_array[i, :len(overlap)] = np.array(overlap)
        if not success:
            # tracker has failed during this sequence - consider all of
            # 'overlaps_array' and use the default padding from the end of
            # sequence to max length.
            mask[i, :] = 1
        else:
            # tracker has successfully tracked to the end - consider only this
            # part of the true sequence, and ignore the padding from the end of
            # sequence to max length.
            mask[i, :len(overlap)] = 1

    overlaps_array_sum = overlaps_array.copy()
    # overlaps_array_sum[i,j] means the mean overlap from 1 to j in i-th
    # sequence
    for j in range(1, overlaps_array_sum.shape[1]):
        overlaps_array_sum[:, j] = np.mean(overlaps_array[:, 1:j + 1], axis=1)

    return np.sum(overlaps_array_sum * mask, axis=0) / np.sum(mask, axis=0)


[文档]def eval_sot_eao(results, annotations, interval=[100, 356], videos_wh=None):
    """Calculate EAO socre over all tracking sequences.

    Args:
        results (list[list[ndarray]]): The first list contains the
            tracking results of each video. The second list contains the
            tracking results of each frame in one video. The ndarray have two
            cases:
                - bbox: denotes the normal tracking box in [x1, y1, w, h]
                    format.
                - special tracking state: [0] denotes the unknown state,
                    namely the skipping frame after failure, [1] denotes the
                    initialized state, and [2] denotes the failed state.
        annotations (list[ndarray]): The list contains the gt_bboxes of each
            video. The ndarray is gt_bboxes of one video. It's in (N, 4) shape.
            Each bbox is in (x1, y1, w, h) format.
        interval: an specified interval in EAO curve used to calculate the EAO
            score. There are different settings in different VOT challenge.
            Default is VOT2018 setting: [100, 356].
        videos_wh (list[tuple(width, height), ...]): The list contains the
            width and height of each video. Default is None.

    Return:
        dict[str, float]: EAO score in EAO evaluation metric.
    """
    if vot is None:
        raise ImportError(
            'Please run'
            'pip install git+https://github.com/votchallenge/toolkit.git'
            'to manually install vot-toolkit')

    if videos_wh is None:
        videos_wh = [None] * len(annotations)

    all_overlaps = []
    all_successes = []

    for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)):
        assert len(gt_traj) == len(
            pred_traj), f'{len(gt_traj)} == {len(pred_traj)}'
        # initialized bbox annotation is [1]
        assert len(pred_traj[0]) == 1 and pred_traj[0][
            0] == 1, f'{len(pred_traj[0])} == 1 and {pred_traj[0][0]} == 1'
        fail_inds, init_inds = locate_failures_inits(pred_traj)

        pred_traj = trajectory2region(pred_traj)
        gt_traj = trajectory2region(gt_traj)
        overlaps = calculate_region_overlaps(pred_traj, gt_traj, videos_wh[i])

        if len(fail_inds) > 0:
            for i in range(len(fail_inds)):
                all_overlaps.append(overlaps[init_inds[i]:fail_inds[i]])
                all_successes.append(False)

            # handle last initialization
            if len(init_inds) > len(fail_inds):
                # tracker was initialized, but it has not failed until the end
                # of the sequence
                all_overlaps.append(overlaps[init_inds[-1]:])
                all_successes.append(True)
        else:
            all_overlaps.append(overlaps)
            all_successes.append(True)

    eao_curve = calc_eao_curve(all_overlaps, all_successes)
    eao_score = np.mean(eao_curve[interval[0]:interval[1] + 1])
    eao = dict(eao=eao_score)
    return eao