Source code for wbia.algo.smk.smk_pipeline

# -*- coding: utf-8 -*-
r"""
Oxford Experiment:
    wbia TestResult --db Oxford -p smk:nWords=[64000],nAssign=[1],SV=[False],can_match_sameimg=True -a oxford


Zebra Experiment:
    python -m wbia draw_rank_cmc --db GZ_Master1 --show \
        -p :proot=smk,num_words=[64000],fg_on=False,nAssign=[1],SV=[False] \
           :proot=vsmany,fg_on=False,SV=[False] \
        -a ctrl:qmingt=2

    python -m wbia draw_rank_cmc --db PZ_Master1 --show \
        -p :proot=smk,num_words=[64000],fg_on=False,nAssign=[1],SV=[False] \
           :proot=vsmany,fg_on=False,SV=[False] \
        -a ctrl:qmingt=2
"""
import logging
from wbia import dtool
import utool as ut
import numpy as np
from wbia.algo.smk import match_chips5 as mc5
from wbia.algo.smk import vocab_indexer
from wbia.algo.smk import inverted_index
from wbia.algo.smk import smk_funcs
from wbia import core_annots
from wbia.algo import Config as old_config  # NOQA

(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')


[docs]class MatchHeuristicsConfig(dtool.Config): _param_info_list = [ ut.ParamInfo('can_match_self', False), ut.ParamInfo('can_match_samename', True), ut.ParamInfo('can_match_sameimg', False), ]
[docs]class SMKRequestConfig(dtool.Config): """Figure out how to do this""" _param_info_list = [ ut.ParamInfo('proot', 'smk'), ut.ParamInfo('smk_alpha', 3.0), ut.ParamInfo('smk_thresh', 0.0), # ut.ParamInfo('smk_thresh', -1.0), ut.ParamInfo('agg', True), ut.ParamInfo('data_ma', False), # hack for query only multiple assignment ut.ParamInfo( 'word_weight_method', 'idf', shortprefix='wwm' ), # hack for query only multiple assignment ut.ParamInfo('smk_version', 3), ] _sub_config_list = [ core_annots.ChipConfig, core_annots.FeatConfig, old_config.SpatialVerifyConfig, vocab_indexer.VocabConfig, inverted_index.InvertedIndexConfig, MatchHeuristicsConfig, ]
[docs]@ut.reloadable_class class SMKRequest(mc5.EstimatorRequest): r""" qreq_-like object. Trying to work on becoming more scikit-ish CommandLine: python -m wbia.algo.smk.smk_pipeline SMKRequest --profile python -m wbia.algo.smk.smk_pipeline SMKRequest --show python -m wbia draw_rank_cmc --db GZ_ALL --show \ -p :proot=smk,num_words=[64000,4000],nAssign=[1,5],sv_on=[False,True] \ -a ctrl:qmingt=2 python -m wbia draw_rank_cmc --db PZ_MTEST --show \ -p :proot=smk,num_words=[64000,8000,4000],nAssign=[1,2,4],sv_on=[True,False] \ default:proot=vsmany,sv_on=[True,False] \ -a default:qmingt=2 python -m wbia draw_rank_cmc --db PZ_MTEST --show \ -p :proot=smk,num_words=[64000],nAssign=[1],sv_on=[True] \ default:proot=vsmany,sv_on=[True] \ -a default:qmingt=2 python -m wbia draw_rank_cmc --db PZ_Master1 --show \ -p :proot=smk,num_words=[64000],nAssign=[1],sv_on=[False] \ -a ctrl:qmingt=2 python -m wbia draw_rank_cmc --db PZ_Master1 \ -p :proot=smk,num_words=[64000],nAssign=[1],sv_on=[True] \ -a ctrl:qmingt=2,qindex=60:80 --profile python -m wbia draw_rank_cmc --db GZ_ALL \ -p :proot=smk,num_words=[64000],nAssign=[1],sv_on=[True] \ -a ctrl:qmingt=2,qindex=40:60 --profile Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST') >>> qaids = aid_list[0:2] >>> daids = aid_list[:] >>> config = {'nAssign': 2, 'num_words': 64000, 'sv_on': True} >>> qreq_ = SMKRequest(ibs, qaids, daids, config) >>> qreq_.ensure_data() >>> cm_list = qreq_.execute() >>> ut.quit_if_noshow() >>> ut.qtensure() >>> cm_list[0].ishow_analysis(qreq_, fnum=1, viz_name_score=False) >>> cm_list[1].ishow_analysis(qreq_, fnum=2, viz_name_score=False) >>> ut.show_if_requested() """ def __init__(qreq_, ibs=None, qaids=None, daids=None, config=None): super(SMKRequest, qreq_).__init__() if config is None: config = {} qreq_.ibs = ibs qreq_.qaids = qaids qreq_.daids = daids qreq_.config = config # qreq_.vocab = None # qreq_.dinva = None qreq_.qinva = None qreq_.dinva = None qreq_.smk = SMK() # Hack to work with existing hs code qreq_.stack_config = SMKRequestConfig(**config) # Flat config qreq_.qparams = dtool.base.StackedConfig([dict(qreq_.stack_config.parse_items())]) # # TODO: add vocab, inva, features qreq_.cachedir = ut.ensuredir((ibs.cachedir, 'smk'))
[docs] def dump_vectors(qreq_): """ Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST', a='default:mingt=2,pername=2') >>> qaids = aid_list[0:2] >>> daids = aid_list[:] >>> config = {'nAssign': 1, 'num_words': 8000, >>> 'sv_on': True} >>> qreq_ = SMKRequest(ibs, qaids, daids, config) >>> qreq_.ensure_data() """ inva = qreq_.dinva X = qreq_.dinva.get_annot(qreq_.daids[0]) n_words = inva.wx_list[-1] + 1 n_dims = X.agg_rvecs.shape[1] n_annots = len(qreq_.daids) X.agg_rvecs.dtype vlads = np.zeros((n_annots, n_words, n_dims), dtype=np.float32) ids_ = list(zip(qreq_.dnids, qreq_.daids)) for count, (nid, aid) in enumerate(ut.ProgIter(ids_, label='vlad')): # X.rrr() X = qreq_.dinva.get_annot(aid) out = vlads[count] out[X.wx_list] = X.agg_rvecs # X.to_dense(out=out) # Flatten out vlads.shape = (n_annots, n_words * n_dims) ut.print_object_size(vlads) fname = 'vlad_%d_d%d_%s' % (n_annots, n_words * n_dims, qreq_.ibs.get_dbname()) fpath = ut.truepath('~/' + fname + '.mat') import scipy.io mdict = { 'vlads': vlads, 'nids': qreq_.dnids, } scipy.io.savemat(fpath, mdict)
[docs] def ensure_data(qreq_): """ >>> import wbia qreq_ = wbia.testdata_qreq_( defaultdb='Oxford', a='oxford', p='default:proot=smk,nAssign=1,num_words=64000,SV=False,can_match_sameimg=True,dim_size=None') """ logger.info('Ensure data for %s' % (qreq_,)) # qreq_.cachedir = ut.ensuredir((ibs.cachedir, 'smk')) qreq_.ensure_nids() def make_cacher(name, cfgstr=None): if cfgstr is None: cfgstr = ut.hashstr27(qreq_.get_cfgstr()) if False and ut.is_developer(): return ut.Cacher( fname=name + '_' + qreq_.ibs.get_dbname(), cfgstr=cfgstr, cache_dir=ut.ensuredir(ut.truepath('~/Desktop/smkcache')), ) else: wrp = ut.DynStruct() def ensure(func): return func() wrp.ensure = ensure return wrp import copy dconfig = copy.deepcopy(qreq_.qparams) qconfig = qreq_.qparams if qreq_.qparams['data_ma']: # Disable database-dise multi-assignment dconfig['nAssign'] = 1 wwm = qreq_.qparams['word_weight_method'] depc = qreq_.ibs.depc vocab_aids = qreq_.daids cheat = False if cheat: import wbia ut.cprint('CHEATING', 'red') vocab_aids = wbia.init.filter_annots.sample_annots_wrt_ref( qreq_.ibs, qreq_.daids, {'exclude_ref_contact': True}, qreq_.qaids, verbose=1, ) vocab_rowid = depc.get_rowids( 'vocab', (vocab_aids,), config=dconfig, ensure=False )[0] assert vocab_rowid is not None depc = qreq_.ibs.depc dinva_pcfgstr = depc.stacked_config( None, 'inverted_agg_assign', config=dconfig ).get_cfgstr() qinva_pcfgstr = depc.stacked_config( None, 'inverted_agg_assign', config=qconfig ).get_cfgstr() dannot_vuuid = qreq_.ibs.get_annot_hashid_visual_uuid(qreq_.daids).strip('_') qannot_vuuid = qreq_.ibs.get_annot_hashid_visual_uuid(qreq_.qaids).strip('_') tannot_vuuid = dannot_vuuid dannot_suuid = qreq_.ibs.get_annot_hashid_semantic_uuid(qreq_.daids).strip('_') qannot_suuid = qreq_.ibs.get_annot_hashid_semantic_uuid(qreq_.qaids).strip('_') dinva_phashid = ut.hashstr27(dinva_pcfgstr + tannot_vuuid) qinva_phashid = ut.hashstr27(qinva_pcfgstr + tannot_vuuid) dinva_cfgstr = '_'.join([dannot_vuuid, dinva_phashid]) qinva_cfgstr = '_'.join([qannot_vuuid, qinva_phashid]) # vocab = inverted_index.new_load_vocab(ibs, qreq_.daids, config) dinva_cacher = make_cacher('inva', dinva_cfgstr) qinva_cacher = make_cacher('inva', qinva_cfgstr) dwwm_cacher = make_cacher('word_weight', wwm + dinva_cfgstr) gamma_phashid = ut.hashstr27(qreq_.get_pipe_cfgstr() + tannot_vuuid) dgamma_cfgstr = '_'.join([dannot_suuid, gamma_phashid]) qgamma_cfgstr = '_'.join([qannot_suuid, gamma_phashid]) dgamma_cacher = make_cacher('dgamma', cfgstr=dgamma_cfgstr) qgamma_cacher = make_cacher('qgamma', cfgstr=qgamma_cfgstr) dinva = dinva_cacher.ensure( lambda: inverted_index.InvertedAnnots.from_depc( depc, qreq_.daids, vocab_aids, dconfig ) ) qinva = qinva_cacher.ensure( lambda: inverted_index.InvertedAnnots.from_depc( depc, qreq_.qaids, vocab_aids, qconfig ) ) dinva.wx_to_aids = dinva.compute_inverted_list() wx_to_weight = dwwm_cacher.ensure(lambda: dinva.compute_word_weights(wwm)) dinva.wx_to_weight = wx_to_weight qinva.wx_to_weight = wx_to_weight thresh = qreq_.qparams['smk_thresh'] alpha = qreq_.qparams['smk_alpha'] dinva.gamma_list = dgamma_cacher.ensure( lambda: dinva.compute_gammas(alpha, thresh) ) qinva.gamma_list = qgamma_cacher.ensure( lambda: qinva.compute_gammas(alpha, thresh) ) qreq_.qinva = qinva qreq_.dinva = dinva logger.info('loading keypoints') if qreq_.qparams.sv_on: qreq_.data_kpts = qreq_.ibs.get_annot_kpts( qreq_.daids, config2_=qreq_.extern_data_config2 ) logger.info('building aid index') qreq_.daid_to_didx = ut.make_index_lookup(qreq_.daids)
[docs] def execute_pipeline(qreq_): """ >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> ibs, smk, qreq_ = testdata_smk() >>> cm_list = qreq_.execute() """ smk = qreq_.smk cm_list = smk.predict_matches(qreq_) return cm_list
[docs] def get_qreq_qannot_kpts(qreq_, qaids): return qreq_.ibs.get_annot_kpts(qaids, config2_=qreq_.extern_query_config2)
[docs] def get_qreq_dannot_kpts(qreq_, daids): didx_list = ut.take(qreq_.daid_to_didx, daids) return ut.take(qreq_.data_kpts, didx_list)
# return qreq_.ibs.get_annot_kpts( # daids, config2_=qreq_.extern_data_config2)
[docs]@ut.reloadable_class class SMK(ut.NiceRepr): """ Harness class that controls the execution of the SMK algorithm K(X, Y) = gamma(X) * gamma(Y) * sum([Mc(Xc, Yc) for c in words]) """
[docs] def predict_matches(smk, qreq_, verbose=True): """ >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> ibs, smk, qreq_ = testdata_smk() >>> verbose = True """ logger.info('Predicting matches') # assert qreq_.qinva.vocab is qreq_.dinva.vocab # X_list = qreq_.qinva.inverted_annots(qreq_.qaids) # Y_list = qreq_.dinva.inverted_annots(qreq_.daids) # verbose = 2 _prog = ut.ProgPartial(lbl='smk query', bs=verbose <= 1, enabled=verbose) daids = np.array(qreq_.daids) cm_list = [ smk.match_single(qaid, daids, qreq_, verbose=verbose > 1) for qaid in _prog(qreq_.qaids) ] return cm_list
[docs] @profile def match_single(smk, qaid, daids, qreq_, verbose=True): """ CommandLine: python -m wbia.algo.smk.smk_pipeline SMK.match_single --profile python -m wbia.algo.smk.smk_pipeline SMK.match_single --show python -m wbia SMK.match_single -a ctrl:qmingt=2 --profile --db PZ_Master1 python -m wbia SMK.match_single -a ctrl --profile --db GZ_ALL Example: >>> # xdoctest: +REQUIRES(--slow) >>> # FUTURE_ENABLE >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> qreq_ = wbia.testdata_qreq_(defaultdb='PZ_MTEST') >>> ibs = qreq_.ibs >>> daids = qreq_.daids >>> #ibs, daids = wbia.testdata_aids(defaultdb='PZ_MTEST', default_set='dcfg') >>> qreq_ = SMKRequest(ibs, daids[0:1], daids, {'agg': True, >>> 'num_words': 1000, >>> 'sv_on': True}) >>> qreq_.ensure_data() >>> qaid = qreq_.qaids[0] >>> daids = qreq_.daids >>> daid = daids[1] >>> verbose = True >>> cm = qreq_.smk.match_single(qaid, daids, qreq_) >>> ut.quit_if_noshow() >>> ut.qtensure() >>> cm.ishow_analysis(qreq_) >>> ut.show_if_requested() """ from wbia.algo.hots import chip_match from wbia.algo.hots import pipeline alpha = qreq_.qparams['smk_alpha'] thresh = qreq_.qparams['smk_thresh'] agg = qreq_.qparams['agg'] # nAnnotPerName = qreq_.qparams.nAnnotPerNameSVER sv_on = qreq_.qparams.sv_on if sv_on: nNameShortList = qreq_.qparams.nNameShortlistSVER shortsize = nNameShortList else: shortsize = None X = qreq_.qinva.get_annot(qaid) # Determine which database annotations need to be checked # with ut.Timer('searching qaid=%r' % (qaid,), verbose=verbose): hit_inva_wxs = ut.take(qreq_.dinva.wx_to_aids, X.wx_list) hit_daids = np.array(list(set(ut.iflatten(hit_inva_wxs)))) # Mark impossible daids # with ut.Timer('checking impossible daids=%r' % (qaid,), verbose=verbose): valid_flags = check_can_match(qaid, hit_daids, qreq_) valid_daids = hit_daids.compress(valid_flags) shortlist = ut.Shortlist(shortsize) # gammaX = smk.gamma(X, wx_to_weight, agg, alpha, thresh) _prog = ut.ProgPartial( lbl='smk scoring qaid=%r' % (qaid,), enabled=verbose, bs=True, adjust=True ) wx_to_weight = qreq_.dinva.wx_to_weight debug = False if debug: qnid = qreq_.get_qreq_annot_nids([qaid])[0] daids = np.array(qreq_.daids) dnids = qreq_.get_qreq_annot_nids(daids) correct_aids = daids[np.where(dnids == qnid)[0]] daid = correct_aids[0] if agg: for daid in _prog(valid_daids): Y = qreq_.dinva.get_annot(daid) item = match_kernel_agg(X, Y, wx_to_weight, alpha, thresh) shortlist.insert(item) else: for daid in _prog(valid_daids): Y = qreq_.dinva.get_annot(daid) item = match_kernel_sep(X, Y, wx_to_weight, alpha, thresh) shortlist.insert(item) # Build chipmatches for the shortlist results # with ut.Timer('build cms', verbose=verbose): cm = chip_match.ChipMatch(qaid=qaid, fsv_col_lbls=['smk']) cm.daid_list = [] cm.fm_list = [] cm.fsv_list = [] _prog = ut.ProgPartial( lbl='smk build cm qaid=%r' % (qaid,), enabled=verbose, bs=True, adjust=True ) for item in _prog(shortlist): (score, score_list, Y, X_idx, Y_idx) = item X_fxs = ut.take(X.fxs_list, X_idx) Y_fxs = ut.take(Y.fxs_list, Y_idx) # Only build matches for those that sver will use if agg: X_maws = ut.take(X.maws_list, X_idx) Y_maws = ut.take(Y.maws_list, Y_idx) fm, fs = smk_funcs.build_matches_agg( X_fxs, Y_fxs, X_maws, Y_maws, score_list ) else: fm, fs = smk_funcs.build_matches_sep(X_fxs, Y_fxs, score_list) if len(fm) > 0: # assert not np.any(np.isnan(fs)) daid = Y.aid fsv = fs[:, None] cm.daid_list.append(daid) cm.fm_list.append(fm) cm.fsv_list.append(fsv) cm._update_daid_index() cm.arraycast_self() cm.score_name_maxcsum(qreq_) # if False: # cm.assert_self(qreq_=qreq_, verbose=True) if sv_on: cm = pipeline.sver_single_chipmatch(qreq_, cm, verbose=verbose) cm.score_name_maxcsum(qreq_) return cm
[docs]def word_isect(X, Y, wx_to_weight): isect_words = sorted(X.words.intersection(Y.words)) X_idx = ut.take(X.wx_to_idx, isect_words) Y_idx = ut.take(Y.wx_to_idx, isect_words) weights = np.array(ut.take(wx_to_weight, isect_words)) return X_idx, Y_idx, weights
[docs]def match_kernel_agg(X, Y, wx_to_weight, alpha, thresh): import utool with utool.embed_on_exception_context: gammaXY = X.gamma * Y.gamma # Words in common define matches X_idx, Y_idx, weights = word_isect(X, Y, wx_to_weight) PhisX, flagsX = X.Phis_flags(X_idx) PhisY, flagsY = Y.Phis_flags(Y_idx) score_list = smk_funcs.match_scores_agg( PhisX, PhisY, flagsX, flagsY, alpha, thresh ) norm_weights = weights * gammaXY score_list *= norm_weights score = score_list.sum() item = (score, score_list, Y, X_idx, Y_idx) return item
[docs]def match_kernel_sep(X, Y, wx_to_weight, alpha, thresh): gammaXY = X.gamma * Y.gamma # Words in common define matches X_idx, Y_idx, weights = word_isect(X, Y, wx_to_weight) phisX_list, flagsY_list = X.phis_flags_list(X_idx) phisY_list, flagsX_list = Y.phis_flags_list(Y_idx) scores_list = smk_funcs.match_scores_sep( phisX_list, phisY_list, flagsX_list, flagsY_list, alpha, thresh ) norm_weights = weights * gammaXY for scores, w in zip(scores_list, norm_weights): scores *= w score = [s.sum() for s in scores_list].sum() item = (score, scores_list, Y, X_idx, Y_idx) return item
[docs]def check_can_match(qaid, hit_daids, qreq_): can_match_samename = qreq_.qparams.can_match_samename can_match_sameimg = qreq_.qparams.can_match_sameimg can_match_self = False valid_flags = np.ones(len(hit_daids), dtype=np.bool) # Check that the two annots meet the conditions if not can_match_self: valid_flags[hit_daids == qaid] = False if not can_match_samename: qnid = qreq_.get_qreq_annot_nids([qaid])[0] hit_dnids = qreq_.get_qreq_annot_nids(hit_daids) valid_flags[hit_dnids == qnid] = False if not can_match_sameimg: qgid = qreq_.get_qreq_annot_gids([qaid])[0] hit_dgids = qreq_.get_qreq_annot_gids(hit_daids) valid_flags[hit_dgids == qgid] = False return valid_flags
[docs]def testdata_smk(*args, **kwargs): """ >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> kwargs = {} """ import wbia import sklearn import sklearn.model_selection # import sklearn.model_selection ibs, aid_list = wbia.testdata_aids(defaultdb='PZ_MTEST') nid_list = np.array(ibs.annots(aid_list).nids) xvalkw = dict(n_splits=4, shuffle=False) skf = sklearn.model_selection.StratifiedKFold(**xvalkw) train_idx, test_idx = next(skf.split(aid_list, nid_list)) daids = ut.take(aid_list, train_idx) qaids = ut.take(aid_list, test_idx) config = { 'num_words': 1000, } config.update(**kwargs) qreq_ = SMKRequest(ibs, qaids, daids, config) smk = qreq_.smk # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={'pipeline_root': 'smk', 'proot': 'smk'}) # qreq_ = ibs.new_query_request(qaids, daids, cfgdict={}) return ibs, smk, qreq_