From aebb91b47cad8aa70403eb6dec9dbe49ef6267fb Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 7 Apr 2020 20:43:20 +0200 Subject: min depth --- cli/commands/bridge/words.py | 57 ++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/cli/commands/bridge/words.py b/cli/commands/bridge/words.py index bcfd2c4..0bbf3a5 100644 --- a/cli/commands/bridge/words.py +++ b/cli/commands/bridge/words.py @@ -24,13 +24,17 @@ from app.thesaurus.api import Thesaurus help='Number of words to check per step') @click.option('-c', '--categories_per_word', 'opt_categories_per_word', default=3, help='Number of categories to check per word') +@click.option('-d', '--min_depth', 'opt_min_depth', default=10, + help='Minimum depth of matches') +@click.option('-sh', '--use_shortest_path', 'opt_use_shortest_path', is_flag=True, + help='Use shortest path between words') @click.pass_context -def cli(ctx, opt_word_a, opt_word_b, opt_include_oe, opt_include_slang, opt_words_per_step, opt_categories_per_word): +def cli(ctx, opt_word_a, opt_word_b, opt_include_oe, opt_include_slang, opt_words_per_step, opt_categories_per_word, opt_min_depth, opt_use_shortest_path): """ Find connections between two words """ thesaurus = Thesaurus() - solver = TreeSolver(thesaurus, opt_word_a, opt_word_b, opt_include_oe, opt_include_slang, opt_words_per_step, opt_categories_per_word) + solver = TreeSolver(thesaurus, opt_word_a, opt_word_b, opt_include_oe, opt_include_slang, opt_words_per_step, opt_categories_per_word, opt_min_depth, opt_use_shortest_path) print(f"Starting word: {opt_word_a}") print(f"Ending word: {opt_word_b}") @@ -52,7 +56,7 @@ def cli(ctx, opt_word_a, opt_word_b, opt_include_oe, opt_include_slang, opt_word # print(solver.skips) class TreeSolver: - def __init__(self, thesaurus, word_a, word_b, include_oe, include_slang, words_per_step, categories_per_word): + def __init__(self, thesaurus, word_a, word_b, include_oe, include_slang, words_per_step, categories_per_word, min_depth, use_shortest_path): self.thesaurus = thesaurus self.word_a = word_a self.word_b = word_b @@ -61,6 +65,8 @@ class TreeSolver: self.words_per_step = words_per_step self.categories_per_word = categories_per_word self.skips = [] + self.min_depth = min_depth + self.use_shortest_path = use_shortest_path self.max_dist = 0 self.reset() @@ -76,6 +82,7 @@ class TreeSolver: words = words[:self.words_per_step] for word in tqdm(words): categories = self.thesaurus.search(word)['categories'] + random.shuffle(categories) count = 0 for category in categories: if count > self.categories_per_word: @@ -93,6 +100,7 @@ class TreeSolver: if len(add_to_queue): next_queue += add_to_queue count += 1 + random.shuffle(next_queue) return next_queue def process_category(self, catid, tree, target): @@ -118,17 +126,22 @@ class TreeSolver: if word not in tree: tree[word] = tree[catid] + 1 self.max_dist = max(self.max_dist, tree[word]) - if word in target: + if word in target and self.is_deep_enough(word): self.make_chain(hinge=word, can_remove=True) return word - if word in target: + if word in target and self.is_deep_enough(word): self.make_chain(hinge=word, can_remove=True) return None + def is_deep_enough(self, word): + return (self.tree_a[word] + self.tree_b[word]) >= self.min_depth + def make_chain(self, hinge, can_remove=True): # tqdm.write(f"Making chain from {hinge}") chain_a = self.descend_chain(hinge, self.tree_a) chain_b = self.descend_chain(hinge, self.tree_b) + if chain_a is None or chain_b is None: + return False chain = list(reversed(chain_a)) + [hinge] + chain_b self.display_chain(chain) if can_remove: @@ -163,18 +176,20 @@ class TreeSolver: while word is not None: match = None if self.is_integer(word): - category_result = self.thesaurus.category(word) - for category_word in category_result['words']: + category_words = self.thesaurus.category(word)['words'] + random.shuffle(category_words) + for category_word in category_words: cat_word = self.fix_word(category_word['word']) - if cat_word != word and cat_word in tree and tree[cat_word] < tree[word]: + if cat_word != word and cat_word in tree and self.can_descend(tree, cat_word, word): chain.append(cat_word) match = cat_word break else: categories = self.thesaurus.search(word)['categories'] + random.shuffle(categories) for category in categories: catid = category['catid'] - if catid != word and catid in tree and tree[catid] < tree[word]: + if catid != word and catid in tree and self.can_descend(tree, catid, word): chain.append(catid) match = catid break @@ -183,17 +198,23 @@ class TreeSolver: if tree[word] == 0: break else: - if self.is_integer(word): - tqdm.write(f"No match for: {self.get_category_name(word)}") - tqdm.write(f"Chain started with {start_word}") - self.display_chain(chain) - else: - tqdm.write(f"No match for: {word}") - tqdm.write(f"Chain started with {start_word}") - self.display_chain(chain) - return [] + # if self.is_integer(word): + # tqdm.write(f"No match for: {self.get_category_name(word)}") + # tqdm.write(f"Chain started with {start_word}") + # self.display_chain(chain) + # else: + # tqdm.write(f"No match for: {word}") + # tqdm.write(f"Chain started with {start_word}") + # self.display_chain(chain) + return None return chain + def can_descend(self, tree, word_x, word_y): + if self.use_shortest_path: + return tree[word_x] < tree[word_y] + else: + return tree[word_x] == tree[word_y] - 1 + def display_chain(self, chain): tqdm.write("") for i, word in enumerate(chain): -- cgit v1.2.3-70-g09d2