diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2020-05-13 14:57:03 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2020-05-13 14:57:03 +0200 |
| commit | 3893c9d39b4b8a172069684c707491477b395e83 (patch) | |
| tree | f27d6585a4bd6aa41e5bd44a3b874eee18df9186 /cli/commands/bridge/finnegan.py | |
| parent | 6145dd0293ac2ba35f49b2a0f5d5c4b724f79302 (diff) | |
Diffstat (limited to 'cli/commands/bridge/finnegan.py')
| -rw-r--r-- | cli/commands/bridge/finnegan.py | 24 |
1 files changed, 17 insertions, 7 deletions
diff --git a/cli/commands/bridge/finnegan.py b/cli/commands/bridge/finnegan.py index 25482e4..f3f0e3a 100644 --- a/cli/commands/bridge/finnegan.py +++ b/cli/commands/bridge/finnegan.py @@ -16,6 +16,8 @@ from app.utils.word_utils import is_oe, is_slang, is_scots, is_compound_word, fi help='Minimum length') @click.option('-c', '--categories', 'opt_category_count', required=True, type=int, default=2, help='Categories to use for synonyms per word-fragment, or specify 0 to use all categories') +@click.option('-r', '--reuse_word', 'opt_reuse_word', required=True, is_flag=True, + help='Whether to reuse pieces of the original word') @click.option('-oe', '--include_oe', 'opt_include_oe', is_flag=True, help='Whether to include OE/archaic words') @click.option('-sl', '--include_slang', 'opt_include_slang', is_flag=True, @@ -25,15 +27,15 @@ from app.utils.word_utils import is_oe, is_slang, is_scots, is_compound_word, fi @click.option('-comp', '--include_compound', 'opt_include_compound_words', is_flag=True, help='Whether to include compound words (words that contain a space or hyphen)') @click.pass_context -def cli(ctx, opt_word, opt_min_len, opt_splits, opt_category_count, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words): +def cli(ctx, opt_word, opt_min_len, opt_splits, opt_category_count, opt_reuse_word, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words): """Split a word into pieces and substitute the pieces with entries from the thesaurus""" - finneganizer = Finneganizer(opt_min_len, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words, opt_category_count) + finneganizer = Finneganizer(opt_min_len, opt_include_oe, opt_include_slang, opt_include_scots, opt_include_compound_words, opt_category_count, opt_reuse_word) new_words = finneganizer.process_pieces(opt_word, opt_splits - 1) for word in new_words: print(word) class Finneganizer: - def __init__(self, opt_min_len, include_oe, include_slang, include_scots, include_compound_words, category_count): + def __init__(self, opt_min_len, include_oe, include_slang, include_scots, include_compound_words, category_count, reuse_word): self.thesaurus = Thesaurus() self.opt_min_len = opt_min_len self.include_oe = include_oe @@ -41,6 +43,7 @@ class Finneganizer: self.include_scots = include_scots self.include_compound_words = include_compound_words self.category_count = category_count + self.reuse_word = reuse_word def process_pieces(self, word, opt_splits): """Recursively find synonyms for the pieces of a word""" @@ -49,32 +52,39 @@ class Finneganizer: index = self.opt_min_len end_len = len(word) - self.opt_min_len - words = [word] + words = [] + if self.reuse_word: + words.append(word) while index < end_len: word_a = word[:index] word_b = word[index:] synonyms_a = self.get_synonyms(word_a) synonyms_b = self.process_pieces(word_b, opt_splits - 1) + index += 1 + if len(synonyms_a) == 0 or len(synonyms_b) == 0: + continue for synonym_a, synonym_b in itertools.product(synonyms_a, synonyms_b): words.append(synonym_a + synonym_b) - index += 1 return words def get_synonyms(self, word_a): """Get synonyms from a random category""" + results = [] + if self.reuse_word: + results.append(word_a) categories = self.thesaurus.search(word_a)['categories'] if not categories or len(categories) == 0: - return [word_a] + return results random.shuffle(categories) if self.category_count > 0: categories = categories[:self.category_count] - results = [ word_a ] for category in categories: catid = category['catid'] category_result = self.thesaurus.category(catid) for category_word in category_result['words']: word = fix_word(category_word['word']) + word = word.lower() years = category_word['years'].lower() if not self.include_oe and is_oe(years): continue |
