# -*- coding: utf-8 -*-
import logging
from wbia import dtool
import utool as ut
import vtool as vt
import numpy as np
from wbia.algo.smk import smk_funcs
from wbia.control.controller_inject import register_preprocs
(print, rrr, profile) = ut.inject2(__name__)
logger = logging.getLogger('wbia')
derived_attribute = register_preprocs['annot']
[docs]class InvertedIndexConfig(dtool.Config):
_param_info_list = [
ut.ParamInfo('nAssign', 1),
# ut.ParamInfo('int_rvec', False, hideif=False),
ut.ParamInfo('int_rvec', True, hideif=False),
ut.ParamInfo('massign_equal,', False),
ut.ParamInfo('massign_alpha,', 1.2),
# ut.ParamInfo('massign_sigma,', 80.0, hideif=lambda cfg: cfg['massign_equal']),
ut.ParamInfo('inva_version', 2),
#
# massign_sigma=80.0,
# massign_equal_weights=False
]
[docs]@ut.reloadable_class
class InvertedAnnots(InvertedAnnotsExtras):
"""
CommandLine:
python -m wbia.algo.smk.inverted_index InvertedAnnots --show
Ignore:
>>> from wbia.algo.smk.inverted_index import * # NOQA
>>> import wbia
>>> qreq_ = wbia.testdata_qreq_(defaultdb='Oxford', a='oxford',
>>> p='default:proot=smk,nAssign=1,num_words=64000')
>>> config = qreq_.qparams
>>> ibs = qreq_.ibs
>>> depc = qreq_.ibs.depc
>>> aids = qreq_.daids
>>> aids = qreq_.qaids
>>> input_tuple = (aids, [qreq_.daids])
>>> inva = ut.DynStruct()
>>> inva = InvertedAnnots(aids, qreq_)
Example:
>>> # DISABLE_DOCTEST
>>> qreq_, inva = testdata_inva()
"""
def __init__(inva):
inva.aids = None
inva.wx_lists = None
inva.fxs_lists = None
inva.agg_rvecs = None
inva.agg_flags = None
inva.aid_to_idx = None
inva.gamma_list = None
inva.wx_to_weight = None
inva.wx_to_aids = None
inva.int_rvec = None
inva.config = None
inva.vocab_rowid = None
@property
def wx_list(inva):
wx = sorted(ut.flat_unique(*inva.wx_lists))
return wx
# @classmethod
# def from_qreq(cls, aids, qreq_, isdata=False):
# logger.info('Loading up inverted assigments')
# depc = qreq_.ibs.depc
# vocab_aids = qreq_.daids
# config = qreq_.qparams
# inva = cls.from_depc(depc, aids, vocab_aids, config)
# return inva
[docs] @classmethod
def from_depc(cls, depc, aids, vocab_aids, config):
inva = cls()
vocab_rowid = depc.get_rowids('vocab', (vocab_aids,), config=config)[0]
inva.vocab_rowid = vocab_rowid
tablename = 'inverted_agg_assign'
table = depc[tablename]
input_tuple = (aids, [vocab_rowid] * len(aids))
tbl_rowids = depc.get_rowids(
tablename, input_tuple, config=config, _hack_rootmost=True, _debug=False
)
# input_tuple = (aids, [vocab_aids])
# tbl_rowids = depc.get_rowids(tablename, input_tuple, config=config)
logger.info('Reading data')
inva.aids = aids
inva.wx_lists = [
np.array(wx_list_, dtype=np.int32)
for wx_list_ in table.get_row_data(tbl_rowids, 'wx_list', showprog='load wxs')
]
inva.fxs_lists = [
[np.array(fxs, dtype=np.uint16) for fxs in fxs_list]
for fxs_list in table.get_row_data(
tbl_rowids, 'fxs_list', showprog='load fxs'
)
]
inva.maws_lists = [
[np.array(m, dtype=np.float32) for m in maws]
for maws in table.get_row_data(tbl_rowids, 'maws_list', showprog='load maws')
]
inva.agg_rvecs = table.get_row_data(
tbl_rowids, 'agg_rvecs', showprog='load agg_rvecs'
)
inva.agg_flags = table.get_row_data(
tbl_rowids, 'agg_flags', showprog='load agg_flags'
)
# less memory hogs
inva.aid_to_idx = ut.make_index_lookup(inva.aids)
inva.int_rvec = config['int_rvec']
inva.gamma_list = None
# Inverted list
inva.wx_to_weight = None
inva.wx_to_aids = None
inva.config = config
return inva
def _assert_self(inva, qreq_):
ibs = qreq_.ibs
assert len(inva.aids) == len(inva.wx_lists)
assert len(inva.aids) == len(inva.fxs_lists)
assert len(inva.aids) == len(inva.maws_lists)
assert len(inva.aids) == len(inva.agg_rvecs)
assert len(inva.aids) == len(inva.agg_flags)
nfeat_list1 = ibs.get_annot_num_feats(inva.aids, config2_=qreq_.qparams)
nfeat_list2 = [sum(ut.lmap(len, fx_list)) for fx_list in inva.fxs_lists]
nfeat_list3 = [sum(ut.lmap(len, maws)) for maws in inva.maws_lists]
ut.assert_lists_eq(nfeat_list1, nfeat_list2)
ut.assert_lists_eq(nfeat_list1, nfeat_list3)
def __getstate__(inva):
state = inva.__dict__
return state
def __setstate__(inva, state):
inva.__dict__.update(**state)
[docs] @profile
@ut.memoize
def get_annot(inva, aid):
idx = inva.aid_to_idx[aid]
X = SingleAnnot.from_inva(inva, idx)
return X
[docs] def compute_inverted_list(inva):
with ut.Timer('Building inverted list'):
wx_to_aids = smk_funcs.invert_lists(inva.aids, inva.wx_lists)
return wx_to_aids
[docs] @profile
def compute_word_weights(inva, method='idf'):
"""
Compute a per-word weight like idf
Example:
>>> # xdoctest: +REQUIRES(--slow)
>>> # ENABLE_DOCTEST
>>> from wbia.algo.smk.inverted_index import * # NOQA
>>> qreq_, inva = testdata_inva()
>>> wx_to_weight = inva.compute_word_weights()
>>> print('wx_to_weight = %r' % (wx_to_weight,))
"""
wx_list = sorted(inva.wx_to_aids.keys())
with ut.Timer('Computing %s weights' % (method,)):
if method == 'idf':
ndocs_total = len(inva.aids)
# Unweighted documents
ndocs_per_word = np.array(
[len(set(inva.wx_to_aids[wx])) for wx in wx_list]
)
weight_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
elif method == 'idf-maw':
# idf denom (the num of docs containing a word for each word)
# The max(maws) denote the prob that this word indexes an annot
ndocs_total = len(inva.aids)
# Weighted documents
wx_to_ndocs = {wx: 0.0 for wx in wx_list}
for wx, maws in zip(
ut.iflatten(inva.wx_lists), ut.iflatten(inva.maws_lists)
):
wx_to_ndocs[wx] += min(1.0, max(maws))
ndocs_per_word = ut.take(wx_to_ndocs, wx_list)
weight_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
elif method == 'uniform':
weight_per_word = np.ones(len(wx_list))
wx_to_weight = dict(zip(wx_list, weight_per_word))
wx_to_weight = ut.DefaultValueDict(0, wx_to_weight)
return wx_to_weight
[docs] @profile
def compute_gammas(inva, alpha, thresh):
"""
Example:
>>> # xdoctest: +REQUIRES(--slow)
>>> # ENABLE_DOCTEST
>>> from wbia.algo.smk.inverted_index import * # NOQA
>>> qreq_, inva = testdata_inva()
>>> inva.wx_to_weight = inva.compute_word_weights('uniform')
>>> alpha = 3.0
>>> thresh = 0.0
>>> gamma_list = inva.compute_gammas(alpha, thresh)
"""
# TODO: sep
wx_to_weight = inva.wx_to_weight
_prog = ut.ProgPartial(
length=len(inva.wx_lists), bs=True, lbl='gamma', adjust=True
)
_iter = zip(inva.wx_lists, inva.agg_rvecs, inva.agg_flags)
gamma_list = []
for wx_list, phiX_list, flagsX_list in _prog(_iter):
if inva.int_rvec:
phiX_list = smk_funcs.uncast_residual_integer(phiX_list)
weight_list = np.array(ut.take(wx_to_weight, wx_list))
gammaX = smk_funcs.gamma_agg(
phiX_list, flagsX_list, weight_list, alpha, thresh
)
gamma_list.append(gammaX)
return gamma_list
[docs]@ut.reloadable_class
class SingleAnnot(ut.NiceRepr):
def __init__(X):
X.aid = None
X.wx_list = None
X.fxs_list = None
X.maws_list = None
X.agg_rvecs = None
X.agg_flags = None
X.gamma = None
X.wx_to_idx = None
X.int_rvec = None
X.wx_set = None
def __nice___(X):
return '%s' % (X.aid,)
[docs] @classmethod
def from_inva(cls, inva, idx):
X = cls()
X.aid = inva.aids[idx]
X.wx_list = inva.wx_lists[idx]
X.fxs_list = inva.fxs_lists[idx]
X.maws_list = inva.maws_lists[idx]
X.agg_rvecs = inva.agg_rvecs[idx]
X.agg_flags = inva.agg_flags[idx]
if inva.gamma_list is not None:
X.gamma = inva.gamma_list[idx]
X.wx_to_idx = ut.make_index_lookup(X.wx_list)
X.int_rvec = inva.int_rvec
X.wx_set = set(X.wx_list)
return X
[docs] def to_dense(X, inva=None, out=None):
if out is None:
assert inva is not None
n_words = inva.wx_list[-1] + 1
n_dims = X.agg_rvecs.shape[1]
out = np.zeros((n_words * n_dims), dtype=np.float32)
# out[X.wx_list] = X.Phis_flags(range(len(X.wx_list)))[0]
out[X.wx_list] = X.agg_rvecs
return out
@property
def words(X):
return X.wx_set
[docs] @profile
def fxs(X, c):
idx = X.wx_to_idx[c]
fxs = X.fxs_list[idx]
return fxs
[docs] @profile
def maws(X, c):
idx = X.wx_to_idx[c]
maws = X.maws_list[idx]
return maws
[docs] def phis_flags_list(X, idxs):
"""get subset of non-aggregated residual vectors"""
phis_list = ut.take(X.rvecs_list, idxs)
flags_list = ut.take(X.flags_list, idxs)
if X.int_rvec:
phis_list = ut.lmap(smk_funcs.uncast_residual_integer, phis_list)
return phis_list, flags_list
[docs] def Phis_flags(X, idxs):
"""get subset of aggregated residual vectors"""
Phis = X.agg_rvecs.take(idxs, axis=0)
flags = X.agg_flags.take(idxs, axis=0)
if X.int_rvec:
Phis = smk_funcs.uncast_residual_integer(Phis)
return Phis, flags
def _assert_self(X, qreq_, vocab):
import utool as ut
all_fxs = sorted(ut.flatten(X.fxs_list))
assert len(all_fxs) > all_fxs[-1]
assert len(all_fxs) == qreq_.ibs.get_annot_num_feats(X.aid, qreq_.config)
nAssign = qreq_.qparams['nAssign']
int_rvec = qreq_.qparams['int_rvec']
# vocab = new_load_vocab(qreq_.ibs, qreq_.daids, qreq_.config)
annots = qreq_.ibs.annots([X.aid], config=qreq_.config)
vecs = annots.vecs[0]
argtup = residual_args(vocab, vecs, nAssign, int_rvec)
wx_list, word_list, fxs_list, maws_list, fx_to_vecs, int_rvec = argtup
assert np.all(X.wx_list == wx_list)
assert np.all([all(a == b) for a, b in zip(X.fxs_list, fxs_list)])
assert np.all([all(a == b) for a, b in zip(X.maws_list, maws_list)])
tup = residual_worker(argtup)
(wx_list, fxs_list, maws_list, agg_rvecs, agg_flags) = tup
assert np.all(X.agg_rvecs == agg_rvecs)
assert np.all(X.agg_flags == agg_flags)
assert X.agg_rvecs is not agg_rvecs
assert X.agg_flags is not agg_flags
[docs] def nbytes_info(X):
size_info = ut.map_vals(ut.get_object_nbytes, X.__dict__)
return size_info
[docs] def nbytes(X):
size_info = X.nbytes_info()
nbytes = sum(size_info.values())
return nbytes
[docs]@derived_attribute(
tablename='inverted_agg_assign',
parents=['feat', 'vocab'],
colnames=[
'wx_list',
'fxs_list',
'maws_list',
# 'rvecs_list',
# 'flags_list',
'agg_rvecs',
'agg_flags',
],
coltypes=[
list,
list,
list,
# list, list,
np.ndarray,
np.ndarray,
],
configclass=InvertedIndexConfig,
fname='smk/smk_agg_rvecs',
chunksize=256,
)
def compute_residual_assignments(depc, fid_list, vocab_id_list, config):
r"""
CommandLine:
python -m wbia.control.IBEISControl show_depc_annot_table_input \
--show --tablename=residuals
Ignore:
ibs.depc['vocab'].print_table()
Ignore:
data = ibs.depc.get('inverted_agg_assign', ([1, 2473], qreq_.daids), config=qreq_.config)
wxs1 = data[0][0]
wxs2 = data[1][0]
# Lev Example
import wbia
ibs = wbia.opendb('Oxford')
depc = ibs.depc
table = depc['inverted_agg_assign']
table.print_table()
table.print_internal_info()
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.smk.inverted_index import * # NOQA
>>> # Test depcache access
>>> import wbia
>>> ibs, aid_list = wbia.testdata_aids('testdb1')
>>> depc = ibs.depc_annot
>>> config = {'num_words': 1000, 'nAssign': 1}
>>> #input_tuple = (aid_list, [aid_list] * len(aid_list))
>>> daids = aid_list
>>> input_tuple = (daids, [daids])
>>> rowid_kw = {}
>>> tablename = 'inverted_agg_assign'
>>> target_tablename = tablename
>>> input_ids = depc.get_parent_rowids(tablename, input_tuple, config)
>>> fid_list = ut.take_column(input_ids, 0)
>>> vocab_id_list = ut.take_column(input_ids, 1)
>>> data = depc.get(tablename, input_tuple, config)
>>> tup = dat[1]
Example:
>>> # DISABLE_DOCTEST
>>> from wbia.algo.smk.inverted_index import * # NOQA
>>> import wbia
>>> qreq_ = wbia.testdata_qreq_(defaultdb='Oxford', a='oxford', p='default:proot=smk,nAssign=1,num_words=64000')
>>> config = {'num_words': 64000, 'nAssign': 1, 'int_rvec': True}
>>> depc = qreq_.ibs.depc
>>> daids = qreq_.daids
>>> input_tuple = (daids, [daids])
>>> rowid_kw = {}
>>> tablename = 'inverted_agg_assign'
>>> target_tablename = tablename
>>> input_ids = depc.get_parent_rowids(tablename, input_tuple, config)
>>> fid_list = ut.take_column(input_ids, 0)
>>> vocab_id_list = ut.take_column(input_ids, 1)
"""
# logger.info('[IBEIS] ASSIGN RESIDUALS:')
assert ut.allsame(vocab_id_list)
vocabid = vocab_id_list[0]
# NEED HACK TO NOT LOAD INDEXER EVERY TIME
this_table = depc['inverted_agg_assign']
vocab_table = depc['vocab']
if (
this_table._hack_chunk_cache is not None
and vocabid in this_table._hack_chunk_cache
):
vocab = this_table._hack_chunk_cache[vocabid]
else:
vocab = vocab_table.get_row_data([vocabid], 'words')[0]
if this_table._hack_chunk_cache is not None:
this_table._hack_chunk_cache[vocabid] = vocab
logger.info('Grab Vecs')
vecs_list = depc.get_native('feat', fid_list, 'vecs')
nAssign = config['nAssign']
int_rvec = config['int_rvec']
from concurrent import futures
logger.info('Building residual args')
worker = residual_worker
args_gen = gen_residual_args(vocab, vecs_list, nAssign, int_rvec)
args_gen = [
args for args in ut.ProgIter(args_gen, length=len(vecs_list), lbl='building args')
]
# nprocs = ut.num_unused_cpus(thresh=10) - 1
nprocs = ut.num_cpus()
logger.info('Creating %d processes' % (nprocs,))
executor = futures.ProcessPoolExecutor(nprocs)
try:
logger.info('Submiting workers')
fs_chunk = [
executor.submit(worker, args)
for args in ut.ProgIter(args_gen, lbl='submit proc')
]
for fs in ut.ProgIter(fs_chunk, lbl='getting phi result'):
tup = fs.result()
yield tup
except Exception:
raise
finally:
executor.shutdown(wait=True)
[docs]def gen_residual_args(vocab, vecs_list, nAssign, int_rvec):
for vecs in vecs_list:
argtup = residual_args(vocab, vecs, nAssign, int_rvec)
yield argtup
[docs]def residual_args(vocab, vecs, nAssign, int_rvec):
fx_to_vecs = vecs
fx_to_wxs, fx_to_maws = smk_funcs.assign_to_words(vocab, fx_to_vecs, nAssign)
wx_to_fxs, wx_to_maws = smk_funcs.invert_assigns(fx_to_wxs, fx_to_maws)
wx_list = sorted(wx_to_fxs.keys())
word_list = ut.take(vocab.wx_to_word, wx_list)
fxs_list = ut.take(wx_to_fxs, wx_list)
maws_list = ut.take(wx_to_maws, wx_list)
argtup = (wx_list, word_list, fxs_list, maws_list, fx_to_vecs, int_rvec)
return argtup
[docs]def residual_worker(argtup):
wx_list, word_list, fxs_list, maws_list, fx_to_vecs, int_rvec = argtup
if int_rvec:
agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.int8)
else:
agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.float)
agg_flags = np.empty((len(wx_list), 1), dtype=np.bool)
# for idx, wx in enumerate(wx_list):
for idx in range(len(wx_list)):
# wx = wx_list[idx]
word = word_list[idx]
fxs = fxs_list[idx]
maws = maws_list[idx]
vecs = fx_to_vecs.take(fxs, axis=0)
_rvecs, _flags = smk_funcs.compute_rvec(vecs, word)
# rvecs = _rvecs # NOQA
# error_flags = _flags # NOQA
_agg_rvec, _agg_flag = smk_funcs.aggregate_rvecs(_rvecs, maws, _flags)
# Cast to integers for storage
if int_rvec:
_agg_rvec = smk_funcs.cast_residual_integer(_agg_rvec)
agg_rvecs[idx] = _agg_rvec
agg_flags[idx] = _agg_flag
tup = (wx_list, fxs_list, maws_list, agg_rvecs, agg_flags)
return tup
[docs]def testdata_inva():
"""
from wbia.algo.smk.inverted_index import * # NOQA
"""
import wbia
qreq_ = wbia.testdata_qreq_(
defaultdb='PZ_MTEST', a='default', p='default:proot=smk,nAssign=1,num_words=64'
)
aids = qreq_.daids
cls = InvertedAnnots
depc = qreq_.ibs.depc
vocab_aids = qreq_.daids
config = qreq_.qparams
inva = cls.from_depc(depc, aids, vocab_aids, config)
inva.wx_to_aids = inva.compute_inverted_list()
return qreq_, inva