Source code for wbia.algo.hots.neighbor_index

# -*- coding: utf-8 -*-
"""
TODO:
    Remove Bloat
multi_index.py as well

https://github.com/spotify/annoy
"""
import logging
import numpy as np
import utool as ut
import vtool as vt
from vtool._pyflann_backend import pyflann as pyflann

# import itertools as it
import lockfile
from os.path import basename
from wbia.algo.hots import hstypes
from wbia.algo.hots import _pipeline_helpers as plh  # NOQA

(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')

USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOSAVE_FLANN = ut.get_argflag('--nosave-flann')
NOCACHE_FLANN = ut.get_argflag('--nocache-flann') and USE_HOTSPOTTER_CACHE


[docs]def get_support_data(qreq_, daid_list): """ CommandLine: python -m wbia.algo.hots.neighbor_index get_support_data --show Example: >>> # xdoctest: +REQUIRES(module:wbia_cnn) >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> qreq_ = wbia.testdata_qreq_(defaultdb='PZ_MTEST', p=':fgw_thresh=.9,maxscale_thresh=10', a=':size=2') >>> daid_list = qreq_.daids >>> tup = get_support_data(qreq_, daid_list) >>> vecs_list, fgws_list, fxs_list = tup >>> assert all([np.all(fgws > .9) for fgws in fgws_list]) >>> result = ('depth_profile = %r' % (ut.depth_profile(tup),)) >>> print(result) depth_profile = [[(128, 128), (174, 128)], [128, 174], [128, 174]] I can't figure out why this tests isn't determenistic all the time and I can't get it to reproduce non-determenism. This could be due to theano. depth_profile = [[(39, 128), (22, 128)], [39, 22], [39, 22]] depth_profile = [[(35, 128), (24, 128)], [35, 24], [35, 24]] depth_profile = [[(34, 128), (31, 128)], [34, 31], [34, 31]] depth_profile = [[(83, 128), (129, 128)], [83, 129], [83, 129]] depth_profile = [[(13, 128), (104, 128)], [13, 104], [13, 104]] """ config2_ = qreq_.get_internal_data_config2() vecs_list = qreq_.ibs.get_annot_vecs(daid_list, config2_=config2_) # Create corresponding feature indicies fxs_list = [np.arange(len(vecs)) for vecs in vecs_list] # <HACK:featweight> # hack to get feature weights. returns None if feature weights are turned # off in config settings if config2_.minscale_thresh is not None or config2_.maxscale_thresh is not None: min_ = -np.inf if config2_.minscale_thresh is None else config2_.minscale_thresh max_ = np.inf if config2_.maxscale_thresh is None else config2_.maxscale_thresh kpts_list = qreq_.ibs.get_annot_kpts(daid_list, config2_=config2_) # kpts_list = vt.ziptake(kpts_list, fxs_list, axis=0) # not needed for first filter scales_list = [vt.get_scales(kpts) for kpts in kpts_list] # Remove data under the threshold flags_list = [ np.logical_and(scales >= min_, scales <= max_) for scales in scales_list ] vecs_list = vt.zipcompress(vecs_list, flags_list, axis=0) fxs_list = vt.zipcompress(fxs_list, flags_list, axis=0) if qreq_.qparams.fg_on: # I've found that the call to get_annot_fgweights is different on # different machines. Something must be configured differently. fgws_list = qreq_.ibs.get_annot_fgweights( daid_list, config2_=config2_, ensure=True ) fgws_list = vt.ziptake(fgws_list, fxs_list, axis=0) # assert list(map(len, fgws_list)) == list(map(len, vecs_list)), 'bad corresponding vecs' if config2_.fgw_thresh is not None and config2_.fgw_thresh > 0: flags_list = [fgws > config2_.fgw_thresh for fgws in fgws_list] # Remove data under the threshold fgws_list = vt.zipcompress(fgws_list, flags_list, axis=0) vecs_list = vt.zipcompress(vecs_list, flags_list, axis=0) fxs_list = vt.zipcompress(fxs_list, flags_list, axis=0) else: fgws_list = None # </HACK:featweight> return vecs_list, fgws_list, fxs_list
[docs]def invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=ut.NOT_QUIET): r""" Aggregates descriptors of input annotations and returns inverted information Args: vecs_list (list): fgws_list (list): ax_list (list): fxs_list (list): verbose (bool): verbosity flag(default = True) Returns: tuple: (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) CommandLine: python -m wbia.algo.hots.neighbor_index invert_index Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> rng = np.random.RandomState(42) >>> DIM_SIZE = 16 >>> nFeat_list = [3, 0, 4, 1] >>> vecs_list = [rng.randn(nFeat, DIM_SIZE) for nFeat in nFeat_list] >>> fgws_list = [rng.randn(nFeat) for nFeat in nFeat_list] >>> fxs_list = [np.arange(nFeat) for nFeat in nFeat_list] >>> ax_list = np.arange(len(vecs_list)) >>> fgws_list = None >>> verbose = True >>> tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(8, 16), 1, 8, 8] Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999') >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, qreq_.daids) >>> ax_list = np.arange(len(vecs_list)) >>> input_ = vecs_list, fgws_list, ax_list, fxs_list >>> print('input depth_profile = %s' % (ut.depth_profile(input_),)) >>> tup = invert_index(*input_) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(1912, 128), 1912, 1912, 1912] """ if ut.VERYVERBOSE: logger.info('[nnindex] stacking descriptors from %d annotations' % len(ax_list)) try: nFeat_list = np.array(list(map(len, vecs_list))) # Remove input without any features is_valid = nFeat_list > 0 nFeat_list = nFeat_list.compress(is_valid) vecs_list = ut.compress(vecs_list, is_valid) if fgws_list is not None: fgws_list = ut.compress(fgws_list, is_valid) ax_list = ut.compress(ax_list, is_valid) fxs_list = ut.compress(fxs_list, is_valid) # Flatten into inverted index axs_list = [[ax] * nFeat for (ax, nFeat) in zip(ax_list, nFeat_list)] nFeats = sum(nFeat_list) idx2_ax = np.fromiter(ut.iflatten(axs_list), np.int32, nFeats) idx2_fx = np.fromiter(ut.iflatten(fxs_list), np.int32, nFeats) idx2_vec = np.vstack(vecs_list) if fgws_list is None: idx2_fgw = None else: idx2_fgw = np.hstack(fgws_list) try: assert len(idx2_fgw) == len( idx2_vec ), 'error. weights and vecs do not correspond' except Exception as ex: ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')]) raise assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: ut.printex(ex, 'cannot build inverted index', '[!memerror]') raise if ut.VERYVERBOSE or verbose: logger.info( '[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list) ) ) logger.info( '[nnindex] idx2_vecs dtype={}, memory={}'.format( idx2_vec.dtype, ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize) ) ) return idx2_vec, idx2_fgw, idx2_ax, idx2_fx
[docs]@ut.reloadable_class class NeighborIndex(object): r""" wrapper class around flann stores flann index and data it needs to index into Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer() """ ext = '.flann' prefix1 = 'flann' def __init__(nnindexer, flann_params, cfgstr): r""" initialize an empty neighbor indexer """ nnindexer.flann = None # Approximate search structure nnindexer.ax2_aid = None # (A x 1) Mapping to original annot ids nnindexer.idx2_vec = None # (M x D) Descriptors to index nnindexer.idx2_fgw = None # (M x 1) Descriptor forground weight nnindexer.idx2_ax = None # (M x 1) Index into the aid_list nnindexer.idx2_fx = None # (M x 1) Index into the annot's features nnindexer.cfgstr = cfgstr # configuration id if flann_params is None: flann_params = {'algorithm': 'kdtree'} if 'random_seed' not in flann_params: # Make flann determenistic for the same data flann_params['random_seed'] = 42 nnindexer.flann_params = flann_params # nprocs = ut.util_parallel.__NUM_PROCS__ # if nprocs is None: # nprocs = 0 nprocs = ut.num_cpus() nnindexer.cores = flann_params.get('cores', nprocs) nnindexer.checks = flann_params.get('checks', 1028) nnindexer.num_indexed = None nnindexer.flann_fpath = None nnindexer.max_distance_sqrd = None # max possible distance^2 for normalization
[docs] def init_support(indexer, aid_list, vecs_list, fgws_list, fxs_list, verbose=True): r""" prepares inverted indicies and FLANN data structure flattens vecs_list and builds a reverse index from the flattened indices (idx) to the original aids and fxs """ assert indexer.flann is None, 'already initalized' logger.info('[nnindex] Preparing data for indexing / loading index') # Check input assert len(aid_list) == len(vecs_list), 'invalid input. bad len' assert len(aid_list) > 0, ( 'len(aid_list) == 0.' 'Cannot invert index without features!' ) # Create indexes into the input aids ax_list = np.arange(len(aid_list)) # Invert indicies tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=verbose) idx2_vec, idx2_fgw, idx2_ax, idx2_fx = tup ax2_aid = np.array(aid_list) indexer.flann = pyflann.FLANN() # Approximate search structure indexer.ax2_aid = ax2_aid # (A x 1) Mapping to original annot ids indexer.idx2_vec = idx2_vec # (M x D) Descriptors to index indexer.idx2_fgw = idx2_fgw # (M x 1) Descriptor forground weight indexer.idx2_ax = idx2_ax # (M x 1) Index into the aid_list indexer.idx2_fx = idx2_fx # (M x 1) Index into the annot's features indexer.aid2_ax = ut.make_index_lookup(indexer.ax2_aid) indexer.num_indexed = indexer.idx2_vec.shape[0] if indexer.idx2_vec.dtype == hstypes.VEC_TYPE: # these are sift descriptors indexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD else: # FIXME: hacky way to support siam128 descriptors. # raise AssertionError( # 'NNindexer should get uint8s right now unless the algorithm has # changed') indexer.max_distance_sqrd = None
[docs] def add_wbia_support(nnindexer, qreq_, new_daid_list, verbose=ut.NOT_QUIET): r""" # TODO: ensure that the memcache changes appropriately """ from wbia.algo.hots.neighbor_index_cache import clear_memcache clear_memcache() if verbose: logger.info( '[nnindex] request add %d annots to single-indexer' % (len(new_daid_list)) ) indexed_aids = nnindexer.get_indexed_aids() duplicate_aids = set(new_daid_list).intersection(indexed_aids) if len(duplicate_aids) > 0: if verbose: logger.info( ( '[nnindex] request has %d annots that are already ' 'indexed. ignore those' ) % (len(duplicate_aids),) ) new_daid_list_ = np.array(sorted(list(set(new_daid_list) - duplicate_aids))) else: new_daid_list_ = new_daid_list if len(new_daid_list_) == 0: if verbose: logger.info('[nnindex] Nothing to do') else: tup = get_support_data(qreq_, new_daid_list_) new_vecs_list, new_fgws_list, new_fxs_list = tup nnindexer.add_support( new_daid_list_, new_vecs_list, new_fgws_list, verbose=verbose )
[docs] def remove_wbia_support(nnindexer, qreq_, remove_daid_list, verbose=ut.NOT_QUIET): r""" # TODO: ensure that the memcache changes appropriately """ if verbose: logger.info( '[nnindex] request remove %d annots from single-indexer' % (len(remove_daid_list)) ) from wbia.algo.hots.neighbor_index import clear_memcache clear_memcache() nnindexer.remove_support(remove_daid_list, verbose=verbose)
[docs] def remove_support(nnindexer, remove_daid_list, verbose=ut.NOT_QUIET): r""" CommandLine: python -m wbia.algo.hots.neighbor_index --test-remove_support SeeAlso: ~/code/flann/src/python/pyflann/index.py Example: >>> # SLOW_DOCTEST >>> # xdoctest: +SKIP >>> # (IMPORTANT) >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer(use_memcache=False) >>> remove_daid_list = [8, 9, 10, 11] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> # execute test function >>> nnindexer.remove_support(remove_daid_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid))) >>> assert qfx2_idx2.max() < ax2_nvecs[0], 'should only get points from aid 7' >>> assert qfx2_idx1.max() > ax2_nvecs[0], 'should get points from everyone' """ if ut.DEBUG2: logger.info('REMOVING POINTS') # TODO: ensure no duplicates ax2_remove_flag = np.in1d(nnindexer.ax2_aid, remove_daid_list) remove_ax_list = np.nonzero(ax2_remove_flag)[0] idx2_remove_flag = np.in1d(nnindexer.idx2_ax, remove_ax_list) remove_idx_list = np.nonzero(idx2_remove_flag)[0] if verbose: logger.info( '[nnindex] Found %d / %d annots that need removing' % (len(remove_ax_list), len(remove_daid_list)) ) logger.info( '[nnindex] Removing %d indexed features' % (len(remove_idx_list),) ) # FIXME: indicies may need adjustment after remove points # Currently this is not being done and the data is just being left alone # This should be ok temporarilly because removed ids should not # be returned by the flann object nnindexer.flann.remove_points(remove_idx_list) # FIXME: # nnindexer.ax2_aid if True: nnindexer.ax2_aid[remove_ax_list] = -1 nnindexer.idx2_fx[remove_idx_list] = -1 nnindexer.idx2_vec[remove_idx_list] = 0 if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw[remove_idx_list] = np.nan nnindexer.aid2_ax = ut.make_index_lookup(nnindexer.ax2_aid) # FIXME: This will definitely bug out if you remove points and then try # to add the same points back again. if ut.DEBUG2: logger.info('DONE REMOVE POINTS')
[docs] def add_support( nnindexer, new_daid_list, new_vecs_list, new_fgws_list, new_fxs_list, verbose=ut.NOT_QUIET, ): r""" adds support data (aka data to be indexed) Args: new_daid_list (list): list of annotation ids that are being added new_vecs_list (list): list of descriptor vectors for each annotation new_fgws_list (list): list of weights per vector for each annotation verbose (bool): verbosity flag(default = True) CommandLine: python -m wbia.algo.hots.neighbor_index --test-add_support Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer(use_memcache=False) >>> new_daid_list = [2, 3, 4] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> new_vecs_list, new_fgws_list, new_fxs_list = get_support_data(qreq_, new_daid_list) >>> # execute test function >>> nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list, new_fxs_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> assert qfx2_idx2.max() > qfx2_idx1.max() """ # TODO: ensure no duplicates nAnnots = nnindexer.num_indexed_annots() nVecs = nnindexer.num_indexed_vecs() nNewAnnots = len(new_daid_list) new_ax_list = np.arange(nAnnots, nAnnots + nNewAnnots) tup = invert_index( new_vecs_list, new_fgws_list, new_ax_list, new_fxs_list, verbose=verbose ) new_idx2_vec, new_idx2_fgw, new_idx2_ax, new_idx2_fx = tup nNewVecs = len(new_idx2_vec) if verbose or ut.VERYVERBOSE: logger.info( ( '[nnindex] Adding %d vecs from %d annots to nnindex ' 'with %d vecs and %d annots' ) % (nNewVecs, nNewAnnots, nVecs, nAnnots) ) if ut.DEBUG2: logger.info('STACKING') # Stack inverted information old_idx2_vec = nnindexer.idx2_vec if nnindexer.idx2_fgw is not None: new_idx2_fgw = np.hstack(new_fgws_list) # nnindexer.old_vecs.append(new_idx2_fgw) _ax2_aid = np.hstack((nnindexer.ax2_aid, new_daid_list)) _idx2_ax = np.hstack((nnindexer.idx2_ax, new_idx2_ax)) _idx2_fx = np.hstack((nnindexer.idx2_fx, new_idx2_fx)) _idx2_vec = np.vstack((old_idx2_vec, new_idx2_vec)) if nnindexer.idx2_fgw is not None: _idx2_fgw = np.hstack((nnindexer.idx2_fgw, new_idx2_fgw)) if ut.DEBUG2: logger.info('REPLACING') nnindexer.ax2_aid = _ax2_aid nnindexer.idx2_ax = _idx2_ax nnindexer.idx2_vec = _idx2_vec nnindexer.idx2_fx = _idx2_fx nnindexer.aid2_ax = ut.make_index_lookup(nnindexer.ax2_aid) if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw = _idx2_fgw # nnindexer.idx2_kpts = None # nnindexer.idx2_oris = None # Add new points to flann structure if ut.DEBUG2: logger.info('ADD POINTS (FIXME: SOMETIMES SEGFAULT OCCURS)') logger.info('new_idx2_vec.dtype = %r' % new_idx2_vec.dtype) logger.info('new_idx2_vec.shape = %r' % (new_idx2_vec.shape,)) nnindexer.flann.add_points(new_idx2_vec) if ut.DEBUG2: logger.info('DONE ADD POINTS')
[docs] def ensure_indexer( nnindexer, cachedir, verbose=True, force_rebuild=False, memtrack=None, prog_hook=None, ): r""" Ensures that you get a neighbor indexer. It either loads a chached indexer or rebuilds a new one. """ if NOCACHE_FLANN or force_rebuild: logger.info('...nnindex flann cache is forced off') load_success = False else: try: load_success = nnindexer.load(cachedir, verbose=verbose) except Exception: load_success = False if load_success: if not ut.QUIET: nVecs = nnindexer.num_indexed_vecs() nAnnots = nnindexer.num_indexed_annots() logger.info( '...nnindex flann cache hit: %d vectors, %d annots' % (nVecs, nAnnots) ) else: if not ut.QUIET: nVecs = nnindexer.num_indexed_vecs() nAnnots = nnindexer.num_indexed_annots() logger.info( '...nnindex flann cache miss: %d vectors, %d annots' % (nVecs, nAnnots) ) if prog_hook is not None: prog_hook.set_progress(1, 2, 'Building new indexer (may take some time)') nnindexer.build_and_save(cachedir, verbose=verbose, memtrack=memtrack) if prog_hook is not None: prog_hook.set_progress(2, 2, 'Finished loading indexer')
[docs] def build_and_save(nnindexer, cachedir, verbose=True, memtrack=None): nnindexer.reindex(memtrack=memtrack) nnindexer.save(cachedir, verbose=verbose)
[docs] def reindex(nnindexer, verbose=True, memtrack=None): r"""indexes all vectors with FLANN.""" num_vecs = nnindexer.num_indexed notify_num = 1e6 verbose_ = ut.VERYVERBOSE or verbose or (not ut.QUIET and num_vecs > notify_num) if verbose_: logger.info( '[nnindex] ...building kdtree over %d points (this may take a sec).' % num_vecs ) tt = ut.tic(msg='Building index') idx2_vec = nnindexer.idx2_vec flann_params = nnindexer.flann_params if num_vecs == 0: logger.info( 'WARNING: CANNOT BUILD FLANN INDEX OVER 0 POINTS. THIS MAY BE A SIGN OF A DEEPER ISSUE' ) else: if memtrack is not None: memtrack.report('BEFORE BUILD FLANN INDEX') nnindexer.flann.build_index(idx2_vec, **flann_params) if memtrack is not None: memtrack.report('AFTER BUILD FLANN INDEX') if verbose_: ut.toc(tt)
# ---- <cachable_interface> ---
[docs] def save(nnindexer, cachedir=None, fpath=None, verbose=True): r""" Caches a flann neighbor indexer to disk (not the data) """ if NOSAVE_FLANN: if ut.VERYVERBOSE or verbose: logger.info('[nnindex] flann save is deactivated') return False if fpath is None: flann_fpath = nnindexer.get_fpath(cachedir) else: flann_fpath = fpath nnindexer.flann_fpath = flann_fpath if ut.VERYVERBOSE or verbose: logger.info( '[nnindex] flann.save_index(%r)' % ut.path_ndir_split(flann_fpath, n=5) ) with lockfile.LockFile(flann_fpath): nnindexer.flann.save_index(flann_fpath)
[docs] def load(nnindexer, cachedir=None, fpath=None, verbose=True): r""" Loads a cached flann neighbor indexer from disk (not the data) """ load_success = False if fpath is None: flann_fpath = nnindexer.get_fpath(cachedir) else: flann_fpath = fpath nnindexer.flann_fpath = flann_fpath if ut.checkpath(flann_fpath, verbose=verbose): idx2_vec = nnindexer.idx2_vec # Warning: Loading a FLANN index with old headers may silently fail. with lockfile.LockFile(flann_fpath): try: nnindexer.flann.load_index(flann_fpath, idx2_vec) except (IOError, pyflann.FLANNException) as ex: ut.printex(ex, '... cannot load nnindex flann', iswarning=True) except Exception as ex: ut.printex(ex, '... cannot load nnindex flann', iswarning=True) else: load_success = True return load_success
[docs] def get_prefix(nnindexer): return nnindexer.prefix1
[docs] def get_cfgstr(nnindexer, noquery=False): r"""returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m wbia.algo.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> cfgdict = dict(fg_on=False) >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja) """ flann_cfgstr_list = [] use_params_hash = True use_data_hash = True if use_params_hash: flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm']) # flann_params_clean = flann_defaults.copy() flann_params_clean = ut.sort_dict(flann_defaults) ut.update_existing(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict( algorithm='algo', checks='chks', random_seed='seed', trees='t' ) short_params = ut.odict( [ (shortnames.get(key, key), str(val)[0:7]) for key, val in flann_params_clean.items() ] ) flann_valsig_ = ut.repr2(short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') # flann_valsig_ = str(list(flann_params.values())) # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') if use_data_hash: vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
[docs] def get_fname(nnindexer): return basename(nnindexer.get_fpath(''))
[docs] def get_fpath(nnindexer, cachedir, cfgstr=None): _args2_fpath = ut.util_cache._args2_fpath prefix = nnindexer.get_prefix() cfgstr = nnindexer.get_cfgstr(noquery=True) ext = nnindexer.ext fpath = _args2_fpath(cachedir, prefix, cfgstr, ext) return fpath
# ---- </cachable_interface> ---
[docs] def get_dtype(nnindexer): return nnindexer.idx2_vec.dtype
[docs] @profile def knn(indexer, qfx2_vec, K): r""" Returns the indices and squared distance to the nearest K neighbors. The distance is noramlized between zero and one using VEC_PSEUDO_MAX_DISTANCE = (np.sqrt(2) * VEC_PSEUDO_MAX) Args: qfx2_vec : (N x D) an array of N, D-dimensional query vectors K: number of approximate nearest neighbors to find Returns: tuple of (qfx2_idx, qfx2_dist) ndarray : qfx2_idx[n][k] (N x K) is the index of the kth approximate nearest data vector w.r.t qfx2_vec[n] ndarray : qfx2_dist[n][k] (N x K) is the distance to the kth approximate nearest data vector w.r.t. qfx2_vec[n] distance is normalized squared euclidean distance. CommandLine: python -m wbia --tf NeighborIndex.knn:0 --debug2 python -m wbia --tf NeighborIndex.knn:1 Example: >>> # FIXME failing-test (22-Jul-2020) This test is failing and it's not clear how to fix it >>> # xdoctest: +SKIP >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> indexer, qreq_, ibs = testdata_nnindexer() >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> K = 2 >>> indexer.debug_nnindexer() >>> assert vt.check_sift_validity(qfx2_vec), 'bad SIFT properties' >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print('qfx2_vec.dtype = %r' % (qfx2_vec.dtype,)) >>> print('indexer.max_distance_sqrd = %r' % (indexer.max_distance_sqrd,)) >>> assert np.all(qfx2_dist < 1.0), ( >>> 'distance should be less than 1. got %r' % (qfx2_dist,)) >>> # Ensure distance calculations are correct >>> qfx2_dvec = indexer.idx2_vec[qfx2_idx.T] >>> targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2 >>> rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T >>> assert np.all(qfx2_dist * indexer.max_distance_sqrd == rawdist), ( >>> 'inconsistant distance calculations') >>> assert np.allclose(targetdist, qfx2_dist), ( >>> 'inconsistant distance calculations') Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> indexer, qreq_, ibs = testdata_nnindexer() >>> qfx2_vec = np.empty((0, 128), dtype=indexer.get_dtype()) >>> K = 2 >>> (qfx2_idx, qfx2_dist) = indexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print(result) (0, 2) (0, 2) """ if K == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif K > indexer.num_indexed: # If we want more points than there are in the database # FLANN will raise an exception. This corner case # will hopefully only be hit if using the multi-indexer # so try this workaround which should seemlessly integrate # when the multi-indexer stacks the subindxer results. # There is a very strong possibility that this will cause errors # If this corner case is used in non-multi-indexer code K = indexer.num_indexed (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif len(qfx2_vec) == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(0, K) else: try: # perform nearest neighbors (qfx2_idx, qfx2_raw_dist) = indexer.flann.nn_index( qfx2_vec, K, checks=indexer.checks, cores=indexer.cores ) # TODO: catch case where K < dbsize except pyflann.FLANNException as ex: ut.printex( ex, 'probably misread the cached flann_fpath=%r' % (indexer.flann_fpath,), ) # ut.embed() # Uncomment and use if the flan index needs to be deleted # ibs = ut.search_stack_for_localvar('ibs') # cachedir = ibs.get_flann_cachedir() # flann_fpath = indexer.get_fpath(cachedir) raise # Ensure that distance returned are between 0 and 1 if indexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_raw_dist, indexer.max_distance_sqrd) else: qfx2_dist = qfx2_raw_dist if ut.DEBUG2: # Ensure distance calculations are correct qfx2_dvec = indexer.idx2_vec[qfx2_idx.T] targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2 rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T assert np.all( qfx2_raw_dist == rawdist ), 'inconsistant distance calculations' assert np.allclose( targetdist, qfx2_dist ), 'inconsistant distance calculations' return (qfx2_idx, qfx2_dist)
[docs] @profile def requery_knn(indexer, qfx2_vec, K, pad, impossible_aids, recover=True): """ hack for iccv - this is a highly coupled function CommandLine: python -m wbia.algo.hots.neighbor_index requery_knn Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', a='default') >>> qreq_.load_indexer() >>> indexer = qreq_.indexer >>> qannot = qreq_.internal_qannots[1] >>> qfx2_vec = qannot.vecs >>> K = 3 >>> pad = 1 >>> ibs = qreq_.ibs >>> qaid = qannot.aid >>> impossible_aids = ibs.get_annot_groundtruth(qaid, noself=False) >>> impossible_aids = np.array([1, 2, 3, 4, 5]) >>> qfx2_idx, qfx2_dist = indexer.requery_knn(qfx2_vec, K, pad, >>> impossible_aids) >>> #indexer.get_nn_axs(qfx2_idx) >>> assert np.all(np.diff(qfx2_dist, axis=1) >= 0) """ from wbia.algo.hots import requery_knn if K == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif K > indexer.num_indexed: K = indexer.num_indexed (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(len(qfx2_vec), 0) elif len(qfx2_vec) == 0: (qfx2_idx, qfx2_dist) = indexer.empty_neighbors(0, K) else: # hack to try and make things a little bit faster invalid_axs = np.array(ut.take(indexer.aid2_ax, impossible_aids)) # pad += (len(invalid_axs) * 2) def get_neighbors(vecs, temp_K): return indexer.flann.nn_index( vecs, temp_K, checks=indexer.checks, cores=indexer.cores ) get_axs = indexer.get_nn_axs try: (qfx2_idx, qfx2_raw_dist) = requery_knn.requery_knn( get_neighbors, get_axs, qfx2_vec, num_neighbs=K, pad=pad, invalid_axs=invalid_axs, limit=3, recover=recover, ) except pyflann.FLANNException as ex: ut.printex( ex, 'probably misread the cached flann_fpath=%r' % (indexer.flann_fpath,), ) raise if indexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_raw_dist, indexer.max_distance_sqrd) else: qfx2_dist = qfx2_raw_dist return qfx2_idx, qfx2_dist
[docs] def batch_knn(indexer, vecs, K, chunksize=4096, label='batch knn'): """ Works like `indexer.knn` but the input is split into batches and progress is reported to give an esimated time remaining. """ # Preallocate output idxs = np.empty((vecs.shape[0], K), dtype=np.int32) dists = np.empty((vecs.shape[0], K), dtype=np.float32) # Generate chunk slices num_chunks = ut.get_num_chunks(vecs.shape[0], chunksize) iter_ = ut.ichunk_slices(vecs.shape[0], chunksize) prog = ut.ProgIter(iter_, length=num_chunks, label=label) for sl_ in prog: idxs[sl_], dists[sl_] = indexer.knn(vecs[sl_], K=K) return idxs, dists
[docs] def debug_nnindexer(nnindexer): r""" Makes sure the indexer has valid SIFT descriptors """ # FIXME: they might not agree if data has been added / removed init_data, extra_data = nnindexer.flann.get_indexed_data() with ut.Indenter('[NNINDEX_DEBUG]'): logger.info('extra_data = %r' % (extra_data,)) logger.info('init_data = %r' % (init_data,)) logger.info( 'nnindexer.max_distance_sqrd = %r' % (nnindexer.max_distance_sqrd,) ) data_agrees = nnindexer.idx2_vec is nnindexer.flann.get_indexed_data()[0] if data_agrees: logger.info('indexed_data agrees') assert vt.check_sift_validity(init_data), 'bad SIFT properties' assert data_agrees, 'indexed data does not agree'
[docs] def empty_neighbors(nnindexer, nQfx, K): qfx2_idx = np.empty((0, K), dtype=np.int32) qfx2_dist = np.empty((0, K), dtype=np.float64) return (qfx2_idx, qfx2_dist)
[docs] def num_indexed_vecs(nnindexer): return nnindexer.idx2_vec.shape[0]
[docs] def num_indexed_annots(nnindexer): # invalid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] == -1) return (nnindexer.ax2_aid != -1).sum()
[docs] def get_indexed_aids(nnindexer): return nnindexer.ax2_aid[nnindexer.ax2_aid != -1]
[docs] def get_indexed_vecs(nnindexer): valid_idxs = nnindexer.ax2_aid[nnindexer.idx2_ax] != -1 valid_idx2_vec = nnindexer.idx2_vec.compress(valid_idxs, axis=0) return valid_idx2_vec
[docs] def get_removed_idxs(nnindexer): r""" __removed_ids = nnindexer.flann._FLANN__removed_ids invalid_idxs = nnindexer.get_removed_idxs() assert len(np.intersect1d(invalid_idxs, __removed_ids)) == len(__removed_ids) """ invalid_idxs = np.nonzero(nnindexer.ax2_aid[nnindexer.idx2_ax] == -1)[0] return invalid_idxs
[docs] def get_nn_vecs(nnindexer, qfx2_nnidx): r"""gets matching vectors""" return nnindexer.idx2_vec.take(qfx2_nnidx, axis=0)
[docs] def get_nn_axs(nnindexer, qfx2_nnidx): r"""gets matching internal annotation indices""" return nnindexer.idx2_ax.take(qfx2_nnidx)
[docs] def get_nn_aids(nnindexer, qfx2_nnidx): r""" Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_aid : (N x K) qfx2_fx[n][k] is the annotation id index of the kth approximate nearest data vector CommandLine: python -m wbia.algo.hots.neighbor_index --exec-get_nn_aids Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> import wbia >>> cfgdict = dict(fg_on=False) >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False,dim_size=450,resize_dim=area') >>> qreq_.load_indexer() >>> nnindexer = qreq_.indexer >>> qfx2_vec = qreq_.ibs.get_annot_vecs( >>> qreq_.get_internal_qaids()[0], >>> config2_=qreq_.get_internal_query_config2()) >>> num_neighbors = 4 >>> (qfx2_nnidx, qfx2_dist) = nnindexer.knn(qfx2_vec, num_neighbors) >>> qfx2_aid = nnindexer.get_nn_aids(qfx2_nnidx) >>> assert qfx2_aid.shape[1] == num_neighbors >>> print('qfx2_aid.shape = %r' % (qfx2_aid.shape,)) >>> assert qfx2_aid.shape[1] == 4 >>> ut.assert_inbounds(qfx2_aid.shape[0], 1200, 1300) """ try: qfx2_ax = nnindexer.idx2_ax.take(qfx2_nnidx) qfx2_aid = nnindexer.ax2_aid.take(qfx2_ax) except Exception as ex: ut.printex( ex, 'Error occurred in aid lookup. Dumping debug info. Are the neighbors idxs correct?', ) logger.info('qfx2_nnidx.shape = %r' % (qfx2_nnidx.shape,)) logger.info('qfx2_nnidx.max() = %r' % (qfx2_nnidx.max(),)) logger.info('qfx2_nnidx.min() = %r' % (qfx2_nnidx.min(),)) nnindexer.debug_nnindexer() raise return qfx2_aid
[docs] def get_nn_featxs(nnindexer, qfx2_nnidx): r""" Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_fx : (N x K) qfx2_fx[n][k] is the feature index (w.r.t the source annotation) of the kth approximate nearest data vector """ qfx2_fx = nnindexer.idx2_fx.take(qfx2_nnidx) return qfx2_fx
[docs] def get_nn_fgws(nnindexer, qfx2_nnidx): r""" Gets forground weights of neighbors CommandLine: python -m wbia --tf NeighborIndex.get_nn_fgws Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_fgw : (N x K) qfx2_fgw[n][k] is the annotation id index of the kth forground weight Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer(dbname='testdb1') >>> qfx2_nnidx = np.array([[0, 1, 2], [3, 4, 5]]) >>> qfx2_fgw = nnindexer.get_nn_fgws(qfx2_nnidx) """ if nnindexer.idx2_fgw is None: qfx2_fgw = np.ones(qfx2_nnidx.shape) else: qfx2_fgw = nnindexer.idx2_fgw.take(qfx2_nnidx) return qfx2_fgw
[docs] def get_nn_nids(indexer, qfx2_nnidx, qreq_): """iccv hack, todo: make faster by direct lookup from idx""" qfx2_aid = indexer.get_nn_aids(qfx2_nnidx) qfx2_nid = qreq_.get_qreq_annot_nids(qfx2_aid) return qfx2_nid
[docs]def in1d_shape(arr1, arr2): return np.in1d(arr1, arr2).reshape(arr1.shape)
[docs]class NeighborIndex2(NeighborIndex, ut.NiceRepr): def __init__(nnindexer, flann_params=None, cfgstr=None): super(NeighborIndex2, nnindexer).__init__(flann_params, cfgstr) nnindexer.config = None # nnindexer.ax2_avuuid = None # (A x 1) Mapping to original annot uuids def __nice__(self): return ' nA=%r nV=%r' % (ut.safelen(self.ax2_aid), ut.safelen(self.idx2_vec))
[docs] @staticmethod def get_support(depc, aid_list, config): vecs_list = depc.get('feat', aid_list, 'vecs', config) if False and config['fg_on']: fgws_list = depc.get('featweight', aid_list, 'fgw', config) else: fgws_list = None return vecs_list, fgws_list
[docs] def on_load(nnindexer, depc): # logger.info('NNINDEX ON LOAD') aid_list = nnindexer.ax2_aid config = nnindexer.config support = nnindexer.get_support(depc, aid_list, config.feat_cfg) nnindexer.init_support(aid_list, *support) nnindexer.load(fpath=nnindexer.flann_fpath) # nnindexer.ax2_aid pass
[docs] def on_save(nnindexer, depc, fpath): # logger.info('NNINDEX ON SAVE') # Save FLANN as well flann_fpath = ut.augpath(fpath, '_flann', newext='.flann') nnindexer.save(fpath=flann_fpath)
def __getstate__(self): # TODO: Figure out how to make these play nice with the depcache state = self.__dict__ # These values are removed before a save to disk del state['flann'] del state['idx2_fgw'] del state['idx2_vec'] del state['idx2_ax'] del state['idx2_fx'] # del state['flann_params'] # del state['checks'] # nnindexer.num_indexed = None # nnindexer.flann_fpath = None # if flann_params is None: # nnindexer.flann_params = flann_params # nnindexer.cores = flann_params.get('cores', 0) # nnindexer.checks = flann_params.get('checks', 1028) # nnindexer.num_indexed = None # nnindexer.flann_fpath = None # nnindexer.max_distance_sqrd = None # max possible distance^2 for normalization return state def __setstate__(self, state_dict): self.__dict__.update(state_dict)
# return {} # def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs): # """ # >>> from wbia.algo.hots.neighbor_index import * # NOQA # >>> qreq_ = wbia.testdata_qreq_(defaultdb='seaturtles') # >>> qreq_.load_indexer() # >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0]) # >>> num_neighbors = 2 # >>> nnindexer = qreq_.indexer # >>> ibs = qreq_.ibs # >>> qaid = 1 # >>> qencid = ibs.get_annot_encounter_text([qaid])[0] # >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid)) # >>> invalid_axs = np.where(ax2_encid == qencid)[0] # """ # return conditional_knn_(nnindexer, qfx2_vec, num_neighbors, invalid_axs)
[docs]def testdata_nnindexer(*args, **kwargs): from wbia.algo.hots.neighbor_index_cache import testdata_nnindexer return testdata_nnindexer(*args, **kwargs)