Source code for wbia.algo.detect.svm

# -*- coding: utf-8 -*-
"""
Interface to Darknet object proposals.
"""
import logging
import utool as ut
from os import listdir
from os.path import join, isfile, isdir

(print, rrr, profile) = ut.inject2(__name__, '[svm]')
logger = logging.getLogger('wbia')


VERBOSE_SVM = ut.get_argflag('--verbsvm') or ut.VERBOSE


CONFIG_URL_DICT = {
    # 'localizer-zebra-10'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.10.zip',
    # 'localizer-zebra-20'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.20.zip',
    # 'localizer-zebra-30'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.30.zip',
    # 'localizer-zebra-40'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.40.zip',
    # 'localizer-zebra-50'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.50.zip',
    # 'localizer-zebra-60'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.60.zip',
    # 'localizer-zebra-70'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.70.zip',
    # 'localizer-zebra-80'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.80.zip',
    # 'localizer-zebra-90'  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.90.zip',
    # 'localizer-zebra-100' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.100.zip',
    # 'image-zebra'         : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
    # 'default'             : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
    # None                  : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
}


[docs]def classify_helper(weight_filepath, vector_list, index_list=None, verbose=VERBOSE_SVM):
    if index_list is None:
        index_list = list(range(len(vector_list)))
    # Init score and class holders
    score_dict = {index: [] for index in index_list}
    class_dict = {index: [] for index in index_list}
    # Load models
    model_tup = ut.load_cPkl(weight_filepath, verbose=verbose)
    model, scaler = model_tup
    # Normalize
    vector_list = scaler.transform(vector_list)
    # calculate decisions and predictions
    # score_list = model.decision_function(vector_list)
    score_list = model.predict_proba(vector_list)
    # Take only the positive probability
    score_list = score_list[:, 1]
    class_list = model.predict(vector_list)
    # Zip together results
    zipped = zip(index_list, score_list, class_list)
    for index, score_, class_ in zipped:
        score_dict[index].append(score_)
        class_dict[index].append(class_)
    # Return scores and classes
    return score_dict, class_dict


[docs]def classify(vector_list, weight_filepath, verbose=VERBOSE_SVM, **kwargs):
    """
    Args:
        thumbail_list (list of str): the list of image thumbnails that need classifying

    Returns:
        iter
    """
    import multiprocessing
    import numpy as np

    # Get correct weight if specified with shorthand
    if weight_filepath in CONFIG_URL_DICT:
        weight_url = CONFIG_URL_DICT[weight_filepath]
        if weight_url.endswith('.zip'):
            weight_filepath = ut.grab_zipped_url(weight_url, appname='wbia')
        else:
            weight_filepath = ut.grab_file_url(
                weight_url, appname='wbia', check_hash=True
            )

    # Get ensemble
    is_ensemble = isdir(weight_filepath)
    if is_ensemble:
        weight_filepath_list = sorted(
            [
                join(weight_filepath, filename)
                for filename in listdir(weight_filepath)
                if isfile(join(weight_filepath, filename))
            ]
        )
    else:
        weight_filepath_list = [weight_filepath]
    num_weights = len(weight_filepath_list)
    assert num_weights > 0

    # Form dictionaries
    num_vectors = len(vector_list)
    index_list = list(range(num_vectors))

    # Generate parallelized wrapper
    OLD = False
    if is_ensemble and OLD:
        vectors_list = [vector_list for _ in range(num_weights)]
        args_list = zip(weight_filepath_list, vectors_list)
        nTasks = num_weights
        logger.info(
            'Processing ensembles in parallel using %d ensembles' % (num_weights,)
        )
    else:
        num_cpus = multiprocessing.cpu_count()
        vector_batch = int(np.ceil(float(num_vectors) / num_cpus))
        vector_rounds = int(np.ceil(float(num_vectors) / vector_batch))

        args_list = []
        for vector_round in range(vector_rounds):
            start_index = vector_round * vector_batch
            stop_index = (vector_round + 1) * vector_batch
            assert start_index < num_vectors
            stop_index = min(stop_index, num_vectors)
            # logger.info('Slicing index range: [%r, %r)' % (start_index, stop_index, ))

            # Slice gids and get feature data
            index_list_ = list(range(start_index, stop_index))
            vector_list_ = vector_list[start_index:stop_index]
            assert len(index_list_) == len(vector_list_)
            for weight_filepath in weight_filepath_list:
                args = (weight_filepath, vector_list_, index_list_)
                args_list.append(args)

        nTasks = len(args_list)
        logger.info(
            'Processing vectors in parallel using vector_batch = %r' % (vector_batch,)
        )

    # Perform inference
    classify_iter = ut.generate2(
        classify_helper, args_list, nTasks=nTasks, ordered=True, force_serial=False
    )

    # Classify with SVM for each image vector
    score_dict = {index: [] for index in index_list}
    class_dict = {index: [] for index in index_list}
    for score_dict_, class_dict_ in classify_iter:
        for index in index_list:
            if index in score_dict_:
                score_dict[index] += score_dict_[index]
            if index in class_dict_:
                class_dict[index] += class_dict_[index]

    # Organize and compute mode and average for class and score
    for index in index_list:
        score_list_ = score_dict[index]
        class_list_ = class_dict[index]
        score_ = sum(score_list_) / len(score_list_)
        class_ = max(set(class_list_), key=class_list_.count)
        class_ = 'positive' if int(class_) == 1 else 'negative'
        yield score_, class_