diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2020-04-02 15:31:46 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2020-04-02 15:31:46 +0200 |
| commit | c14810fe9f663d46b5477088d06047fea66d1524 (patch) | |
| tree | 72eebe1589b71147f003ebb55c66a20eaf5bce6a /cli/app/thesaurus/api.py | |
| parent | 0b55e297d5088962fe8397903041c2b1737c7cdd (diff) | |
ability to skip words if you dont like the connection
Diffstat (limited to 'cli/app/thesaurus/api.py')
| -rw-r--r-- | cli/app/thesaurus/api.py | 39 |
1 files changed, 33 insertions, 6 deletions
diff --git a/cli/app/thesaurus/api.py b/cli/app/thesaurus/api.py index 98e0210..467d5fb 100644 --- a/cli/app/thesaurus/api.py +++ b/cli/app/thesaurus/api.py @@ -1,4 +1,6 @@ import os +import re +import time import requests from hashlib import sha256 @@ -13,10 +15,18 @@ class Thesaurus: sha = sha256(word) hash_path = os.path.join(base_path, sha[0:2]) os.makedirs(hash_path, exist_ok=True) - path = os.path.join(hash_path, word + '.json') + clean_word = re.sub('[^0-9a-zA-Z]+', '*', word) + path = os.path.join(hash_path, clean_word + '.json') if os.path.exists(path): return read_json(path) - data = api_fn(word) + data = None + while data is None: + try: + data = api_fn(word) + except Exception as e: + print("Got HTTP error, sleeping for 5 seconds") + time.sleep(5) + pass write_json(path, data) return data @@ -34,6 +44,11 @@ class ThesaurusAPI: } def search(self, word): + word = word.split('<')[0] + word = word.split('/')[0] + word = word.replace('(', '').replace(')', '') + if len(word) < 1: + return { 'word': word, 'categories': [] } query = { 'qsearch': word, } @@ -42,8 +57,18 @@ class ThesaurusAPI: return [] data = resp.text data = data.split('<div id="resultsTimelineData">')[1].split('</div>')[0] - # print(data) - rows = json.loads(data) + data = data.replace('<span class="oesc">', '') + data = data.replace('</span>', '') + try: + rows = json.loads(data) + except Exception as e: + print(f"Error loading JSON for {word}") + print(data) + # raise e + return { + 'word': word, + 'categories': [], + } cats = [] for row in rows: cat, years = row['popup'].split(']: ') @@ -72,8 +97,10 @@ class ThesaurusAPI: raw_words = raw.split('"><b>')[1:] words = [] for word in raw_words: - word, rest = word.split('</b>') - years = word.split(' <span')[0].strip() + word_partz = word.split('</b>') + word = word_partz[0] + years = word_partz[1].split(' <span')[0].strip() + years = years.replace('\u2013', '-') words.append({ 'word': word, 'years': years, |
