# -*- coding: utf-8 -*-
"""
Interface to Darknet object proposals.
"""
import logging
import utool as ut
from os import listdir
from os.path import join, isfile, isdir
(print, rrr, profile) = ut.inject2(__name__, '[svm]')
logger = logging.getLogger('wbia')
VERBOSE_SVM = ut.get_argflag('--verbsvm') or ut.VERBOSE
CONFIG_URL_DICT = {
# 'localizer-zebra-10' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.10.zip',
# 'localizer-zebra-20' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.20.zip',
# 'localizer-zebra-30' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.30.zip',
# 'localizer-zebra-40' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.40.zip',
# 'localizer-zebra-50' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.50.zip',
# 'localizer-zebra-60' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.60.zip',
# 'localizer-zebra-70' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.70.zip',
# 'localizer-zebra-80' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.80.zip',
# 'localizer-zebra-90' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.90.zip',
# 'localizer-zebra-100' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.localization.zebra.100.zip',
# 'image-zebra' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
# 'default' : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
# None : 'https://wildbookiarepository.azureedge.net/models/classifier.svm.image.zebra.pkl',
}
[docs]def classify_helper(weight_filepath, vector_list, index_list=None, verbose=VERBOSE_SVM):
if index_list is None:
index_list = list(range(len(vector_list)))
# Init score and class holders
score_dict = {index: [] for index in index_list}
class_dict = {index: [] for index in index_list}
# Load models
model_tup = ut.load_cPkl(weight_filepath, verbose=verbose)
model, scaler = model_tup
# Normalize
vector_list = scaler.transform(vector_list)
# calculate decisions and predictions
# score_list = model.decision_function(vector_list)
score_list = model.predict_proba(vector_list)
# Take only the positive probability
score_list = score_list[:, 1]
class_list = model.predict(vector_list)
# Zip together results
zipped = zip(index_list, score_list, class_list)
for index, score_, class_ in zipped:
score_dict[index].append(score_)
class_dict[index].append(class_)
# Return scores and classes
return score_dict, class_dict
[docs]def classify(vector_list, weight_filepath, verbose=VERBOSE_SVM, **kwargs):
"""
Args:
thumbail_list (list of str): the list of image thumbnails that need classifying
Returns:
iter
"""
import multiprocessing
import numpy as np
# Get correct weight if specified with shorthand
if weight_filepath in CONFIG_URL_DICT:
weight_url = CONFIG_URL_DICT[weight_filepath]
if weight_url.endswith('.zip'):
weight_filepath = ut.grab_zipped_url(weight_url, appname='wbia')
else:
weight_filepath = ut.grab_file_url(
weight_url, appname='wbia', check_hash=True
)
# Get ensemble
is_ensemble = isdir(weight_filepath)
if is_ensemble:
weight_filepath_list = sorted(
[
join(weight_filepath, filename)
for filename in listdir(weight_filepath)
if isfile(join(weight_filepath, filename))
]
)
else:
weight_filepath_list = [weight_filepath]
num_weights = len(weight_filepath_list)
assert num_weights > 0
# Form dictionaries
num_vectors = len(vector_list)
index_list = list(range(num_vectors))
# Generate parallelized wrapper
OLD = False
if is_ensemble and OLD:
vectors_list = [vector_list for _ in range(num_weights)]
args_list = zip(weight_filepath_list, vectors_list)
nTasks = num_weights
logger.info(
'Processing ensembles in parallel using %d ensembles' % (num_weights,)
)
else:
num_cpus = multiprocessing.cpu_count()
vector_batch = int(np.ceil(float(num_vectors) / num_cpus))
vector_rounds = int(np.ceil(float(num_vectors) / vector_batch))
args_list = []
for vector_round in range(vector_rounds):
start_index = vector_round * vector_batch
stop_index = (vector_round + 1) * vector_batch
assert start_index < num_vectors
stop_index = min(stop_index, num_vectors)
# logger.info('Slicing index range: [%r, %r)' % (start_index, stop_index, ))
# Slice gids and get feature data
index_list_ = list(range(start_index, stop_index))
vector_list_ = vector_list[start_index:stop_index]
assert len(index_list_) == len(vector_list_)
for weight_filepath in weight_filepath_list:
args = (weight_filepath, vector_list_, index_list_)
args_list.append(args)
nTasks = len(args_list)
logger.info(
'Processing vectors in parallel using vector_batch = %r' % (vector_batch,)
)
# Perform inference
classify_iter = ut.generate2(
classify_helper, args_list, nTasks=nTasks, ordered=True, force_serial=False
)
# Classify with SVM for each image vector
score_dict = {index: [] for index in index_list}
class_dict = {index: [] for index in index_list}
for score_dict_, class_dict_ in classify_iter:
for index in index_list:
if index in score_dict_:
score_dict[index] += score_dict_[index]
if index in class_dict_:
class_dict[index] += class_dict_[index]
# Organize and compute mode and average for class and score
for index in index_list:
score_list_ = score_dict[index]
class_list_ = class_dict[index]
score_ = sum(score_list_) / len(score_list_)
class_ = max(set(class_list_), key=class_list_.count)
class_ = 'positive' if int(class_) == 1 else 'negative'
yield score_, class_