skip original word partsHEAD master

author: Jules Laplace <julescarbon@gmail.com> 2020-05-13 14:57:03 +0200
committer: Jules Laplace <julescarbon@gmail.com> 2020-05-13 14:57:03 +0200
commit: 3893c9d39b4b8a172069684c707491477b395e83 (patch)
tree: f27d6585a4bd6aa41e5bd44a3b874eee18df9186 /cli/commands/bridge/finnegan.py
parent: 6145dd0293ac2ba35f49b2a0f5d5c4b724f79302 (diff)
1 files changed, 17 insertions, 7 deletions
diff --git a/cli/commands/bridge/finnegan.py b/cli/commands/bridge/finnegan.py
index 25482e4..f3f0e3a 100644
--- a/cli/commands/bridge/finnegan.py
+++ b/cli/commands/bridge/finnegan.py
@@ -16,6 +16,8 @@ from app.utils.word_utils import is_oe, is_slang, is_scots, is_compound_word, fi
   help='Minimum length')
 @click.option('-c', '--categories', 'opt_category_count', required=True, type=int, default=2,
   help='Categories to use for synonyms per word-fragment, or specify 0 to use all categories')
+@click.option('-r', '--reuse_word', 'opt_reuse_word', required=True, is_flag=True,
+  help='Whether to reuse pieces of the original word')
 @click.option('-oe', '--include_oe', 'opt_include_oe', is_flag=True,
   help='Whether to include OE/archaic words')
 @click.option('-sl', '--include_slang', 'opt_include_slang', is_flag=True,
@@ -25,15 +27,15 @@ from app.utils.word_utils import is_oe, is_slang, is_scots, is_compound_word, fi
 @click.option('-comp', '--include_compound', 'opt_include_compound_words', is_flag=True,
   help='Whether to include compound words (words that contain a space or hyphen)')
 @click.pass_context
-def cli(ctx, opt_word, opt_min_len, opt_splits, opt_category_count, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words):
+def cli(ctx, opt_word, opt_min_len, opt_splits, opt_category_count, opt_reuse_word, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words):
   """Split a word into pieces and substitute the pieces with entries from the thesaurus"""
-  finneganizer = Finneganizer(opt_min_len, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words, opt_category_count)
+  finneganizer = Finneganizer(opt_min_len, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words, opt_category_count, opt_reuse_word)
   new_words = finneganizer.process_pieces(opt_word, opt_splits - 1)
   for word in new_words:
     print(word)
 
 class Finneganizer:
-  def __init__(self, opt_min_len, include_oe, include_slang, include_scots, include_compound_words, category_count):
+  def __init__(self, opt_min_len, include_oe, include_slang, include_scots, include_compound_words, category_count, reuse_word):
     self.thesaurus = Thesaurus()
     self.opt_min_len = opt_min_len
     self.include_oe = include_oe
@@ -41,6 +43,7 @@ class Finneganizer:
     self.include_scots = include_scots
     self.include_compound_words = include_compound_words
     self.category_count = category_count
+    self.reuse_word = reuse_word
 
   def process_pieces(self, word, opt_splits):
     """Recursively find synonyms for the pieces of a word"""
@@ -49,32 +52,39 @@ class Finneganizer:
 
     index = self.opt_min_len
     end_len = len(word) - self.opt_min_len
-    words = [word]
+    words = []
+    if self.reuse_word:
+      words.append(word)
 
     while index < end_len:
       word_a = word[:index]
       word_b = word[index:]
       synonyms_a = self.get_synonyms(word_a)
       synonyms_b = self.process_pieces(word_b, opt_splits - 1)
+      index += 1
+      if len(synonyms_a) == 0 or len(synonyms_b) == 0:
+        continue
       for synonym_a, synonym_b in itertools.product(synonyms_a, synonyms_b):
         words.append(synonym_a + synonym_b)
-      index += 1
     return words
 
   def get_synonyms(self, word_a):
     """Get synonyms from a random category"""
+    results = []
+    if self.reuse_word:
+      results.append(word_a)
     categories = self.thesaurus.search(word_a)['categories']
     if not categories or len(categories) == 0:
-      return [word_a]
+      return results
     random.shuffle(categories)
     if self.category_count > 0:
       categories = categories[:self.category_count]
-    results = [ word_a ]
     for category in categories:
       catid = category['catid']
       category_result = self.thesaurus.category(catid)
       for category_word in category_result['words']:
         word = fix_word(category_word['word'])
+        word = word.lower()
         years = category_word['years'].lower()
         if not self.include_oe and is_oe(years):
           continue
author	Jules Laplace <julescarbon@gmail.com>	2020-05-13 14:57:03 +0200
committer	Jules Laplace <julescarbon@gmail.com>	2020-05-13 14:57:03 +0200
commit	3893c9d39b4b8a172069684c707491477b395e83 (patch)
tree	f27d6585a4bd6aa41e5bd44a3b874eee18df9186 /cli/commands/bridge/finnegan.py
parent	6145dd0293ac2ba35f49b2a0f5d5c4b724f79302 (diff)