Source code for wbia.algo.hots.neighbor_index_cache

# -*- coding: utf-8 -*-
"""
NEEDS CLEANUP
"""
import logging
from os.path import join
import utool as ut
from wbia.algo.hots import _pipeline_helpers as plh  # NOQA
from wbia.algo.hots.neighbor_index import NeighborIndex, get_support_data

(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')


USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE

# LRU cache for nn_indexers. Ensures that only a few are ever in memory
# MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2)
MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1)
# Background process for building indexes
CURRENT_THREAD = None
# Global map to keep track of UUID lists with prebuild indexers.
UUID_MAP = ut.ddict(dict)
NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE)


[docs]class UUIDMapHyrbridCache(object):
    """
    Class that lets multiple ways of writing to the uuid_map
    be swapped in and out interchangably

    TODO: the global read / write should periodically sync itself to disk and it
    should be loaded from disk initially
    """

    def __init__(self):
        self.uuid_maps = ut.ddict(dict)
        # self.uuid_map_fpath = uuid_map_fpath
        # self.init(uuid_map_fpath, min_reindex_thresh)

[docs]    def init(self, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs
        # self.read_func  = self.read_uuid_map_cpkl
        # self.write_func = self.write_uuid_map_cpkl
        self.read_func = self.read_uuid_map_dict
        self.write_func = self.write_uuid_map_dict

[docs]    def dump(self, cachedir):
        # TODO: DUMP AND LOAD THIS HYBRID CACHE TO DISK
        # write_uuid_map_cpkl
        fname = 'uuid_maps_hybrid_cache.cPkl'
        cpkl_fpath = join(cachedir, fname)
        ut.lock_and_save_cPkl(cpkl_fpath, self.uuid_maps)

[docs]    def load(self, cachedir):
        """
        Returns a cache UUIDMap
        """
        fname = 'uuid_maps_hybrid_cache.cPkl'
        cpkl_fpath = join(cachedir, fname)
        self.uuid_maps = ut.lock_and_load_cPkl(cpkl_fpath)

    # def __call__(self):
    #    return  self.read_func(*self.args, **self.kwargs)

    # def __setitem__(self, daids_hashid, visual_uuid_list):
    #    uuid_map_fpath = self.uuid_map_fpath
    #    self.write_func(uuid_map_fpath, visual_uuid_list, daids_hashid)

    # @profile
    # def read_uuid_map_shelf(self, uuid_map_fpath, min_reindex_thresh):
    #    #with ut.EmbedOnException():
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #            candidate_uuids = {
    #                key: val for key, val in uuid_map.items()
    #                if len(val) >= min_reindex_thresh
    #            }
    #    return candidate_uuids

    # @profile
    # def write_uuid_map_shelf(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
    #    logger.info('Writing %d visual uuids to uuid map' % (len(visual_uuid_list)))
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #            uuid_map[daids_hashid] = visual_uuid_list

    # @profile
    # def read_uuid_map_cpkl(self, uuid_map_fpath, min_reindex_thresh):
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        #with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #        try:
    #            uuid_map = ut.load_cPkl(uuid_map_fpath)
    #            candidate_uuids = {
    #                key: val for key, val in uuid_map.items()
    #                if len(val) >= min_reindex_thresh
    #            }
    #        except IOError:
    #            return {}
    #    return candidate_uuids

    # @profile
    # def write_uuid_map_cpkl(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
    #    """
    #    let the multi-indexer know about any big caches we've made multi-indexer.
    #    Also lets nnindexer know about other prebuilt indexers so it can attempt to
    #    just add points to them as to avoid a rebuild.
    #    """
    #    logger.info('Writing %d visual uuids to uuid map' % (len(visual_uuid_list)))
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        try:
    #            uuid_map = ut.load_cPkl(uuid_map_fpath)
    #        except IOError:
    #            uuid_map = {}
    #        uuid_map[daids_hashid] = visual_uuid_list
    #        ut.save_cPkl(uuid_map_fpath, uuid_map)

[docs]    @profile
    def read_uuid_map_dict(self, uuid_map_fpath, min_reindex_thresh):
        """uses in memory dictionary instead of disk"""
        uuid_map = self.uuid_maps[uuid_map_fpath]
        candidate_uuids = {
            key: val for key, val in uuid_map.items() if len(val) >= min_reindex_thresh
        }
        return candidate_uuids

[docs]    @profile
    def write_uuid_map_dict(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
        """
        uses in memory dictionary instead of disk

        let the multi-indexer know about any big caches we've made multi-indexer.
        Also lets nnindexer know about other prebuilt indexers so it can attempt to
        just add points to them as to avoid a rebuild.
        """
        if NOCACHE_UUIDS:
            logger.info('uuid cache is off')
            return
        # with ut.EmbedOnException():
        uuid_map = self.uuid_maps[uuid_map_fpath]
        uuid_map[daids_hashid] = visual_uuid_list


UUID_MAP_CACHE = UUIDMapHyrbridCache()


# @profile
[docs]def get_nnindexer_uuid_map_fpath(qreq_):
    """
    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache get_nnindexer_uuid_map_fpath

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fgw_thresh=.3')
        >>> uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
        >>> result = str(ut.path_ndir_split(uuid_map_fpath, 3))
        >>> print(result)

        .../_wbia_cache/flann/uuid_map_mzwwsbjisbkdxorl.cPkl
        .../_wbia_cache/flann/uuid_map_FLANN(8_kdtrees_fgwthrsh=0.3)_Feat(hesaff+sift)_Chip(sz700,width).cPkl
        .../_wbia_cache/flann/uuid_map_FLANN(8_kdtrees)_Feat(hesaff+sift)_Chip(sz700,width).cPkl
        .../_wbia_cache/flann/uuid_map_FLANN(8_kdtrees)_FEAT(hesaff+sift_)_CHIP(sz450).cPkl
    """
    flann_cachedir = qreq_.ibs.get_flann_cachedir()
    # Have uuid shelf conditioned on the baseline flann and feature parameters
    flann_cfgstr = qreq_.qparams.flann_cfgstr
    feat_cfgstr = qreq_.qparams.feat_cfgstr
    chip_cfgstr = qreq_.qparams.chip_cfgstr
    featweight_cfgstr = qreq_.qparams.featweight_cfgstr
    if qreq_.qparams.fgw_thresh is None or qreq_.qparams.fgw_thresh == 0:
        uuid_map_cfgstr = ''.join((flann_cfgstr, feat_cfgstr, chip_cfgstr))
    else:
        uuid_map_cfgstr = ''.join(
            (flann_cfgstr, featweight_cfgstr, feat_cfgstr, chip_cfgstr)
        )
    # uuid_map_ext    = '.shelf'
    uuid_map_ext = '.cPkl'
    uuid_map_prefix = 'uuid_map'
    uuid_map_fname = ut.consensed_cfgstr(uuid_map_prefix, uuid_map_cfgstr) + uuid_map_ext
    uuid_map_fpath = join(flann_cachedir, uuid_map_fname)
    return uuid_map_fpath


[docs]def build_nnindex_cfgstr(qreq_, daid_list):
    """
    builds a string that  uniquely identified an indexer built with parameters
    from the input query requested and indexing descriptor from the input
    annotation ids

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    Returns:
        str: nnindex_cfgstr

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-build_nnindex_cfgstr

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(db='testdb1')
        >>> daid_list = ibs.get_valid_aids(species=wbia.const.TEST_SPECIES.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list, cfgdict=dict(fg_on=False))
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> result = str(nnindex_cfgstr)
        >>> print(result)

        _VUUIDS((6)ylydksaqdigdecdd)_FLANN(8_kdtrees)_FeatureWeight(detector=cnn,sz256,thresh=20,ksz=20,enabled=False)_FeatureWeight(detector=cnn,sz256,thresh=20,ksz=20,enabled=False)

        _VUUIDS((6)ylydksaqdigdecdd)_FLANN(8_kdtrees)_FEATWEIGHT(OFF)_FEAT(hesaff+sift_)_CHIP(sz450)
    """
    flann_cfgstr = qreq_.qparams.flann_cfgstr
    featweight_cfgstr = qreq_.qparams.featweight_cfgstr
    feat_cfgstr = qreq_.qparams.feat_cfgstr
    chip_cfgstr = qreq_.qparams.chip_cfgstr
    # FIXME; need to include probchip (or better yet just use depcache)
    # probchip_cfgstr = qreq_.qparams.chip_cfgstr
    data_hashid = get_data_cfgstr(qreq_.ibs, daid_list)
    nnindex_cfgstr = ''.join(
        (data_hashid, flann_cfgstr, featweight_cfgstr, feat_cfgstr, chip_cfgstr)
    )
    return nnindex_cfgstr


[docs]def clear_memcache():
    global NEIGHBOR_CACHE
    NEIGHBOR_CACHE.clear()


[docs]def clear_uuid_cache(qreq_):
    """
    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-clear_uuid_cache

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=True')
        >>> fgws_list = clear_uuid_cache(qreq_)
        >>> result = str(fgws_list)
        >>> print(result)
    """
    logger.info('[nnindex] clearing uuid cache')
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    ut.delete(uuid_map_fpath)
    ut.delete(uuid_map_fpath + '.lock')
    logger.info('[nnindex] finished uuid cache clear')


[docs]def print_uuid_cache(qreq_):
    """
    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-print_uuid_cache

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master0', p='default:fg_on=False')
        >>> print_uuid_cache(qreq_)
        >>> result = str(nnindexer)
        >>> print(result)
    """
    logger.info('[nnindex] clearing uuid cache')
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    candidate_uuids = UUID_MAP_CACHE.read_uuid_map_dict(uuid_map_fpath, 0)
    logger.info(candidate_uuids)


[docs]def request_wbia_nnindexer(qreq_, verbose=True, **kwargs):
    """
    CALLED BY QUERYREQUST::LOAD_INDEXER
    IBEIS interface into neighbor_index_cache

    Args:
        qreq_ (QueryRequest): hyper-parameters

    Returns:
        NeighborIndexer: nnindexer

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache request_wbia_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> nnindexer, qreq_, ibs = testdata_nnindexer(None)
        >>> nnindexer = request_wbia_nnindexer(qreq_)
    """
    daid_list = qreq_.get_internal_daids()
    if not hasattr(qreq_.qparams, 'use_augmented_indexer'):
        qreq_.qparams.use_augmented_indexer = True
    if False and qreq_.qparams.use_augmented_indexer:
        nnindexer = request_augmented_wbia_nnindexer(qreq_, daid_list, **kwargs)
    else:
        nnindexer = request_memcached_wbia_nnindexer(qreq_, daid_list, **kwargs)
    return nnindexer


[docs]def request_augmented_wbia_nnindexer(
    qreq_, daid_list, verbose=True, use_memcache=True, force_rebuild=False, memtrack=None
):
    r"""
    DO NOT USE. THIS FUNCTION CAN CURRENTLY CAUSE A SEGFAULT

    tries to give you an indexer for the requested daids using the least amount
    of computation possible. By loading and adding to a partially build nnindex
    if possible and if that fails fallbs back to request_memcache.

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    Returns:
        str: nnindex_cfgstr

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-request_augmented_wbia_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> # build test data
        >>> ZEB_PLAIN = wbia.const.TEST_SPECIES.ZEB_PLAIN
        >>> ibs = wbia.opendb('testdb1')
        >>> use_memcache, max_covers, verbose = True, None, True
        >>> daid_list = sorted(ibs.get_valid_aids(species=ZEB_PLAIN))[0:6]
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> qreq_.qparams.min_reindex_thresh = 1
        >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh
        >>> # CLEAR CACHE for clean test
        >>> clear_uuid_cache(qreq_)
        >>> # LOAD 3 AIDS INTO CACHE
        >>> aid_list = sorted(ibs.get_valid_aids(species=ZEB_PLAIN))[0:3]
        >>> # Should fallback
        >>> nnindexer = request_augmented_wbia_nnindexer(qreq_, aid_list)
        >>> # assert the fallback
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result2 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result2, ([4, 5, 6], [[1, 2, 3]]), 'pre augment')
        >>> # Should augment
        >>> nnindexer = request_augmented_wbia_nnindexer(qreq_, daid_list)
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result3 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result3, ([], [[1, 2, 3, 4, 5, 6]]), 'post augment')
        >>> # Should fallback
        >>> nnindexer2 = request_augmented_wbia_nnindexer(qreq_, daid_list)
        >>> assert nnindexer is nnindexer2
    """
    global NEIGHBOR_CACHE
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    if not force_rebuild:
        new_daid_list, covered_aids_list = group_daids_by_cached_nnindexer(
            qreq_, daid_list, min_reindex_thresh, max_covers=1
        )
        can_augment = len(covered_aids_list) > 0 and not ut.list_set_equal(
            covered_aids_list[0], daid_list
        )
    else:
        can_augment = False
    if verbose:
        logger.info('[aug] Requesting augmented nnindexer')
    if can_augment:
        covered_aids = covered_aids_list[0]
        if verbose:
            logger.info(
                '[aug] Augmenting index %r old daids with %d new daids'
                % (len(covered_aids), len(new_daid_list))
            )
        # Load the base covered indexer
        # THIS SHOULD LOAD NOT REBUILD IF THE UUIDS ARE COVERED
        base_nnindexer = request_memcached_wbia_nnindexer(
            qreq_, covered_aids, verbose=verbose, use_memcache=use_memcache
        )
        # Remove this indexer from the memcache because we are going to change it
        if NEIGHBOR_CACHE.has_key(  # NOQA (has_key is for a lru cache)
            base_nnindexer.cfgstr
        ):
            logger.info('Removing key from memcache')
            NEIGHBOR_CACHE[base_nnindexer.cfgstr] = None
            del NEIGHBOR_CACHE[base_nnindexer.cfgstr]

        support_data = get_support_data(qreq_, new_daid_list)
        (new_vecs_list, new_fgws_list, new_fxs_list) = support_data
        base_nnindexer.add_support(
            new_daid_list, new_vecs_list, new_fgws_list, new_fxs_list, verbose=True
        )
        # FIXME: pointer issues
        nnindexer = base_nnindexer
        # Change to the new cfgstr
        nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        nnindexer.cfgstr = nnindex_cfgstr
        cachedir = qreq_.ibs.get_flann_cachedir()
        nnindexer.save(cachedir)
        # Write to inverse uuid
        if len(daid_list) > min_reindex_thresh:
            uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
            daids_hashid = get_data_cfgstr(qreq_.ibs, daid_list)
            visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)
            UUID_MAP_CACHE.write_uuid_map_dict(
                uuid_map_fpath, visual_uuid_list, daids_hashid
            )
        # Write to memcache
        if ut.VERBOSE:
            logger.info('[aug] Wrote to memcache=%r' % (nnindex_cfgstr,))
        NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer
        return nnindexer
    else:
        # if ut.VERBOSE:
        if verbose:
            logger.info('[aug] Nothing to augment, fallback to memcache')
        # Fallback
        nnindexer = request_memcached_wbia_nnindexer(
            qreq_,
            daid_list,
            verbose=verbose,
            use_memcache=use_memcache,
            force_rebuild=force_rebuild,
            memtrack=memtrack,
        )
        return nnindexer


[docs]def request_memcached_wbia_nnindexer(
    qreq_,
    daid_list,
    use_memcache=True,
    verbose=ut.NOT_QUIET,
    veryverbose=False,
    force_rebuild=False,
    memtrack=None,
    prog_hook=None,
):
    r"""
    FOR INTERNAL USE ONLY
    takes custom daid list. might not be the same as what is in qreq_

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-request_memcached_wbia_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> # build test data
        >>> ibs = wbia.opendb('testdb1')
        >>> qreq_.qparams.min_reindex_thresh = 3
        >>> ZEB_PLAIN = wbia.const.TEST_SPECIES.ZEB_PLAIN
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3]
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> verbose = True
        >>> use_memcache = True
        >>> # execute function
        >>> nnindexer = request_memcached_wbia_nnindexer(qreq_, daid_list, use_memcache)
        >>> # verify results
        >>> result = str(nnindexer)
        >>> print(result)
    """
    global NEIGHBOR_CACHE
    # try:
    if veryverbose:
        logger.info(
            '[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %r' % (len(NEIGHBOR_CACHE),)
        )
        # the lru cache wont be recognized by get_object_size_str, cast to pure python objects
        logger.info(
            '[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s'
            % (ut.get_object_size_str(NEIGHBOR_CACHE.items()),)
        )
    # if memtrack is not None:
    #    memtrack.report('IN REQUEST MEMCACHE')
    nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    # neighbor memory cache
    if (
        not force_rebuild
        and use_memcache
        and NEIGHBOR_CACHE.has_key(nnindex_cfgstr)  # NOQA (has_key is for a lru cache)
    ):
        if veryverbose or ut.VERYVERBOSE or ut.VERBOSE:
            logger.info('... nnindex memcache hit: cfgstr=%s' % (nnindex_cfgstr,))
        nnindexer = NEIGHBOR_CACHE[nnindex_cfgstr]
    else:
        if veryverbose or ut.VERYVERBOSE or ut.VERBOSE:
            logger.info('... nnindex memcache miss: cfgstr=%s' % (nnindex_cfgstr,))
        # Write to inverse uuid
        nnindexer = request_diskcached_wbia_nnindexer(
            qreq_,
            daid_list,
            nnindex_cfgstr,
            verbose,
            force_rebuild=force_rebuild,
            memtrack=memtrack,
            prog_hook=prog_hook,
        )
        NEIGHBOR_CACHE_WRITE = True
        if NEIGHBOR_CACHE_WRITE:
            # Write to memcache
            if ut.VERBOSE or ut.VERYVERBOSE:
                logger.info('[disk] Write to memcache=%r' % (nnindex_cfgstr,))
            NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer
        else:
            if ut.VERBOSE or ut.VERYVERBOSE:
                logger.info('[disk] Did not write to memcache=%r' % (nnindex_cfgstr,))
    return nnindexer


[docs]def request_diskcached_wbia_nnindexer(
    qreq_,
    daid_list,
    nnindex_cfgstr=None,
    verbose=True,
    force_rebuild=False,
    memtrack=None,
    prog_hook=None,
):
    r"""
    builds new NeighborIndexer which will try to use a disk cached flann if
    available

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):
        nnindex_cfgstr (?):
        verbose (bool):

    Returns:
        NeighborIndexer: nnindexer

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-request_diskcached_wbia_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> # build test data
        >>> ibs = wbia.opendb('testdb1')
        >>> daid_list = ibs.get_valid_aids(species=wbia.const.TEST_SPECIES.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> verbose = True
        >>> # execute function
        >>> nnindexer = request_diskcached_wbia_nnindexer(qreq_, daid_list, nnindex_cfgstr, verbose)
        >>> # verify results
        >>> result = str(nnindexer)
        >>> print(result)
    """
    if nnindex_cfgstr is None:
        nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    cfgstr = nnindex_cfgstr
    cachedir = qreq_.ibs.get_flann_cachedir()
    flann_params = qreq_.qparams.flann_params
    flann_params['checks'] = qreq_.qparams.checks
    # if memtrack is not None:
    #    memtrack.report('[PRE SUPPORT]')
    # Get annot descriptors to index
    if prog_hook is not None:
        prog_hook.set_progress(1, 3, 'Loading support data for indexer')
    logger.info('[nnindex] Loading support data for indexer')
    vecs_list, fgws_list, fxs_list = get_support_data(qreq_, daid_list)
    if memtrack is not None:
        memtrack.report('[AFTER GET SUPPORT DATA]')
    try:
        nnindexer = new_neighbor_index(
            daid_list,
            vecs_list,
            fgws_list,
            fxs_list,
            flann_params,
            cachedir,
            cfgstr=cfgstr,
            verbose=verbose,
            force_rebuild=force_rebuild,
            memtrack=memtrack,
            prog_hook=prog_hook,
        )
    except Exception as ex:
        ut.printex(
            ex, True, msg_='cannot build inverted index', key_list=['ibs.get_infostr()']
        )
        raise
    # Record these uuids in the disk based uuid map so they can be augmented if
    # needed
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    if len(daid_list) > min_reindex_thresh:
        uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
        daids_hashid = get_data_cfgstr(qreq_.ibs, daid_list)
        visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
        if memtrack is not None:
            memtrack.report('[AFTER WRITE_UUID_MAP]')
    return nnindexer


[docs]def group_daids_by_cached_nnindexer(
    qreq_, daid_list, min_reindex_thresh, max_covers=None
):
    r"""
    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-group_daids_by_cached_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb('testdb1')
        >>> ZEB_PLAIN = wbia.const.TEST_SPECIES.ZEB_PLAIN
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> # Set the params a bit lower
        >>> max_covers = None
        >>> qreq_.qparams.min_reindex_thresh = 1
        >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh
        >>> # STEP 0: CLEAR THE CACHE
        >>> clear_uuid_cache(qreq_)
        >>> # STEP 1: ASSERT EMPTY INDEX
        >>> daid_list = sorted(ibs.get_valid_aids(species=ZEB_PLAIN))[0:3]
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result1 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result1, ([], [[1, 2, 3]]), 'pre request')
        >>> # TEST 2: SHOULD MAKE 123 COVERED
        >>> nnindexer = request_memcached_wbia_nnindexer(qreq_, daid_list)
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result2 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result2, ([], [[1, 2, 3]]), 'post request')
    """
    ibs = qreq_.ibs
    # read which annotations have prebuilt caches
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    candidate_uuids = UUID_MAP_CACHE.read_uuid_map_dict(
        uuid_map_fpath, min_reindex_thresh
    )
    # find a maximum independent set cover of the requested annotations
    annot_vuuid_list = ibs.get_annot_visual_uuids(daid_list)  # 3.2 %
    covertup = ut.greedy_max_inden_setcover(
        candidate_uuids, annot_vuuid_list, max_covers
    )  # 0.2 %
    uncovered_vuuids, covered_vuuids_list, accepted_keys = covertup
    # return the grouped covered items (so they can be loaded) and
    # the remaining uuids which need to have an index computed.
    #
    uncovered_aids_ = ibs.get_annot_aids_from_visual_uuid(uncovered_vuuids)  # 28.0%
    covered_aids_list_ = ibs.unflat_map(
        ibs.get_annot_aids_from_visual_uuid, covered_vuuids_list
    )  # 68%
    # FIXME:
    uncovered_aids = sorted(uncovered_aids_)
    # covered_aids_list = list(map(sorted, covered_aids_list_))
    covered_aids_list = covered_aids_list_
    return uncovered_aids, covered_aids_list


[docs]def get_data_cfgstr(ibs, daid_list):
    """part 2 data hash id"""
    daids_hashid = ibs.get_annot_hashid_visual_uuid(daid_list)
    return daids_hashid


[docs]def new_neighbor_index(
    daid_list,
    vecs_list,
    fgws_list,
    fxs_list,
    flann_params,
    cachedir,
    cfgstr,
    force_rebuild=False,
    verbose=True,
    memtrack=None,
    prog_hook=None,
):
    r"""
    constructs neighbor index independent of wbia

    Args:
        daid_list (list):
        vecs_list (list):
        fgws_list (list):
        flann_params (dict):
        flann_cachedir (None):
        nnindex_cfgstr (str):
        use_memcache (bool):

    Returns:
        nnindexer

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-new_neighbor_index

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999')
        >>> daid_list = qreq_.daids
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> ut.exec_funckw(new_neighbor_index, globals())
        >>> cfgstr = nnindex_cfgstr
        >>> cachedir     = qreq_.ibs.get_flann_cachedir()
        >>> flann_params = qreq_.qparams.flann_params
        >>> # Get annot descriptors to index
        >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, daid_list)
        >>> nnindexer = new_neighbor_index(daid_list, vecs_list, fgws_list, fxs_list, flann_params, cachedir, cfgstr, verbose=True)
        >>> result = ('nnindexer.ax2_aid = %s' % (str(nnindexer.ax2_aid),))
        >>> print(result)
        nnindexer.ax2_aid = [1 2 3 4 5 6]
    """
    nnindexer = NeighborIndex(flann_params, cfgstr)
    # if memtrack is not None:
    #    memtrack.report('CREATEED NEIGHTOB INDEX')
    # Initialize neighbor with unindexed data
    nnindexer.init_support(daid_list, vecs_list, fgws_list, fxs_list, verbose=verbose)
    if memtrack is not None:
        memtrack.report('AFTER INIT SUPPORT')
    # Load or build the indexing structure
    nnindexer.ensure_indexer(
        cachedir,
        verbose=verbose,
        force_rebuild=force_rebuild,
        memtrack=memtrack,
        prog_hook=prog_hook,
    )
    if memtrack is not None:
        memtrack.report('AFTER LOAD OR BUILD')
    return nnindexer


[docs]def testdata_nnindexer(dbname='testdb1', with_indexer=True, use_memcache=True):
    r"""

    Ignore:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> nnindexer, qreq_, ibs = testdata_nnindexer('PZ_Master1')
        >>> S = np.cov(nnindexer.idx2_vec.T)
        >>> import wbia.plottool as pt
        >>> pt.ensureqt()
        >>> pt.plt.imshow(S)

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> nnindexer, qreq_, ibs = testdata_nnindexer()
    """
    import wbia

    daid_list = [7, 8, 9, 10, 11]
    ibs = wbia.opendb(db=dbname)
    # use_memcache isn't use here because we aren't lazy loading the indexer
    cfgdict = dict(fg_on=False)
    qreq_ = ibs.new_query_request(
        daid_list, daid_list, use_memcache=use_memcache, cfgdict=cfgdict
    )
    if with_indexer:
        # we do an explicit creation of an indexer for these tests
        nnindexer = request_wbia_nnindexer(qreq_, use_memcache=use_memcache)
    else:
        nnindexer = None
    return nnindexer, qreq_, ibs


# ------------
# NEW


[docs]def check_background_process():
    r"""
    checks to see if the process has finished and then
    writes the uuid map to disk
    """
    global CURRENT_THREAD
    if CURRENT_THREAD is None or CURRENT_THREAD.is_alive():
        logger.info('[FG] background thread is not ready yet')
        return False
    # Get info set in background process
    finishtup = CURRENT_THREAD.finishtup
    (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh) = finishtup
    # Clean up background process
    CURRENT_THREAD.join()
    CURRENT_THREAD = None
    # Write data to current uuidcache
    if len(visual_uuid_list) > min_reindex_thresh:
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
    return True


[docs]def can_request_background_nnindexer():
    return CURRENT_THREAD is None or not CURRENT_THREAD.is_alive()


[docs]def request_background_nnindexer(qreq_, daid_list):
    r"""FIXME: Duplicate code

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    CommandLine:
        python -m wbia.algo.hots.neighbor_index_cache --test-request_background_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.neighbor_index_cache import *  # NOQA
        >>> import wbia
        >>> # build test data
        >>> ibs = wbia.opendb('testdb1')
        >>> daid_list = ibs.get_valid_aids(species=wbia.const.TEST_SPECIES.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> # execute function
        >>> request_background_nnindexer(qreq_, daid_list)
        >>> # verify results
        >>> result = str(False)
        >>> print(result)
    """
    global CURRENT_THREAD
    logger.info('Requesting background reindex')
    if not can_request_background_nnindexer():
        # Make sure this function doesn't run if it is already running
        logger.info('REQUEST DENIED')
        return False
    logger.info('REQUEST ACCPETED')
    daids_hashid = qreq_.ibs.get_annot_hashid_visual_uuid(daid_list)
    cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    cachedir = qreq_.ibs.get_flann_cachedir()
    # Save inverted cache uuid mappings for
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    # Grab the keypoints names and image ids before query time?
    flann_params = qreq_.qparams.flann_params
    # Get annot descriptors to index
    vecs_list, fgws_list, fxs_list = get_support_data(qreq_, daid_list)
    # Dont hash rowids when given enough info in nnindex_cfgstr
    flann_params['cores'] = 2  # Only ues a few cores in the background
    # Build/Load the flann index
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)

    # set temporary attribute for when the thread finishes
    finishtup = (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh)
    CURRENT_THREAD = ut.spawn_background_process(
        background_flann_func,
        cachedir,
        daid_list,
        vecs_list,
        fgws_list,
        fxs_list,
        flann_params,
        cfgstr,
    )

    CURRENT_THREAD.finishtup = finishtup


[docs]def background_flann_func(
    cachedir,
    daid_list,
    vecs_list,
    fgws_list,
    fxs_list,
    flann_params,
    cfgstr,
    uuid_map_fpath,
    daids_hashid,
    visual_uuid_list,
    min_reindex_thresh,
):
    r"""FIXME: Duplicate code"""
    logger.info('[BG] Starting Background FLANN')
    # FIXME. dont use flann cache
    nnindexer = NeighborIndex(flann_params, cfgstr)
    # Initialize neighbor with unindexed data
    nnindexer.init_support(daid_list, vecs_list, fgws_list, fxs_list, verbose=True)
    # Load or build the indexing structure
    nnindexer.ensure_indexer(cachedir, verbose=True)
    if len(visual_uuid_list) > min_reindex_thresh:
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
    logger.info('[BG] Finished Background FLANN')