""" Index all of the FAISS datasets """ import os import glob import faiss import time import numpy as np from app.utils.file_utils import load_recipe, load_csv_safe from app.settings import app_cfg as cfg class DefaultRecipe: def __init__(self): self.dim = 128 self.factory_type = 'Flat' def build_all_faiss_databases(): datasets = [] for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): name = os.path.basename(fn) recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") if os.path.exists(recipe_fn): build_faiss_database(name, load_recipe(recipe_fn)) else: build_faiss_database(name, DefaultRecipe()) def build_faiss_database(name, recipe): vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv") index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") index = faiss.index_factory(recipe.dim, recipe.factory_type) keys, rows = load_csv_safe(vec_fn) feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32') n, d = feats.shape print("{}: training {} x {} dim vectors".format(name, n, d)) print(recipe.factory_type) add_start = time.time() index.add(feats) add_end = time.time() add_time = add_end - add_start print("{}: add time: {:.1f}s".format(name, add_time)) faiss.write_index(index, index_fn) def load_faiss_databases(): faiss_datasets = {} for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): name = os.path.basename(fn) index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") if os.path.exists(index_fn): index = faiss.read_index(index_fn) faiss_datasets[name] = index return faiss_datasets