1 files changed, 42 insertions, 5 deletions
diff --git a/cli/app/commands/process/fetch.py b/cli/app/commands/process/fetch.py
index a558d94..5b6c102 100644
--- a/cli/app/commands/process/fetch.py
+++ b/cli/app/commands/process/fetch.py
@@ -3,6 +3,9 @@ import click
 from app.utils import click_utils
 from app.settings import app_cfg
 
+from os.path import join
+from subprocess import call
+
 @click.command('')
 # @click.option('-i', '--input', 'opt_dir_in', required=True, 
 #   help='Path to input image glob directory')
@@ -10,11 +13,45 @@ from app.settings import app_cfg
 @click.pass_context
 def cli(ctx):
   """
-  Converts directory of images to BigGAN* vectors
   """
 
-  # ------------------------------------------------
-  # imports
-
   # app_cfg.MODELZOO_CFG
-  pass
+  import gensim
+  
+  # from nltk.corpus import wordnet as wn
+  # synsets = wordnet.synsets("fir_tree")
+  # synonyms = [ lemma.name() for lemma in synsets[0].lemmas() ]
+
+  imagenet = Imagenet()
+
+  sentence = "The quick brown fox jumps over the lazy dog"
+  tokens = gensim.utils.simple_preprocess(sentence)
+
+class Imagenet:
+  def __init__():
+    tokens = {}
+    with open(app_cfg.FP_IMAGENET_WORDS, "r") as fp:
+      for line in fp.readlines():
+        wordnet_id, word_list = line.split('\t')
+        words = [word.trim() for word in word_list.split(',')]
+        for word in words:
+          tokens[word] = wordnet_id
+    self.tokens = tokens
+ 
+  def get_wordnet_ids_for_words(tokens):
+    # for token in tokens:
+      # if token in tokens:
+    pass
+
+  def images_from_wordnet_id(wordnet_id):
+    """
+    Given a Wordnet ID, download images for this class
+    """
+    call([
+      "python",
+      join(app_cfg.DIR_APP, "../ImageNet-Datasets-Downloader/downloader.py"),
+      '-data_root', app_cfg.FP_IMAGENET,
+      '-use_class_list', 'True',
+      '-class_list', wordnet_id,
+      '-images_per_class', app_cfg.IMAGENET_IMAGES_PER_CLASS
+    ])