summaryrefslogtreecommitdiff
path: root/cli/app/thesaurus/api.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2020-04-02 15:31:46 +0200
committerJules Laplace <julescarbon@gmail.com>2020-04-02 15:31:46 +0200
commitc14810fe9f663d46b5477088d06047fea66d1524 (patch)
tree72eebe1589b71147f003ebb55c66a20eaf5bce6a /cli/app/thesaurus/api.py
parent0b55e297d5088962fe8397903041c2b1737c7cdd (diff)
ability to skip words if you dont like the connection
Diffstat (limited to 'cli/app/thesaurus/api.py')
-rw-r--r--cli/app/thesaurus/api.py39
1 files changed, 33 insertions, 6 deletions
diff --git a/cli/app/thesaurus/api.py b/cli/app/thesaurus/api.py
index 98e0210..467d5fb 100644
--- a/cli/app/thesaurus/api.py
+++ b/cli/app/thesaurus/api.py
@@ -1,4 +1,6 @@
import os
+import re
+import time
import requests
from hashlib import sha256
@@ -13,10 +15,18 @@ class Thesaurus:
sha = sha256(word)
hash_path = os.path.join(base_path, sha[0:2])
os.makedirs(hash_path, exist_ok=True)
- path = os.path.join(hash_path, word + '.json')
+ clean_word = re.sub('[^0-9a-zA-Z]+', '*', word)
+ path = os.path.join(hash_path, clean_word + '.json')
if os.path.exists(path):
return read_json(path)
- data = api_fn(word)
+ data = None
+ while data is None:
+ try:
+ data = api_fn(word)
+ except Exception as e:
+ print("Got HTTP error, sleeping for 5 seconds")
+ time.sleep(5)
+ pass
write_json(path, data)
return data
@@ -34,6 +44,11 @@ class ThesaurusAPI:
}
def search(self, word):
+ word = word.split('<')[0]
+ word = word.split('/')[0]
+ word = word.replace('(', '').replace(')', '')
+ if len(word) < 1:
+ return { 'word': word, 'categories': [] }
query = {
'qsearch': word,
}
@@ -42,8 +57,18 @@ class ThesaurusAPI:
return []
data = resp.text
data = data.split('<div id="resultsTimelineData">')[1].split('</div>')[0]
- # print(data)
- rows = json.loads(data)
+ data = data.replace('<span class="oesc">', '')
+ data = data.replace('</span>', '')
+ try:
+ rows = json.loads(data)
+ except Exception as e:
+ print(f"Error loading JSON for {word}")
+ print(data)
+ # raise e
+ return {
+ 'word': word,
+ 'categories': [],
+ }
cats = []
for row in rows:
cat, years = row['popup'].split(']: ')
@@ -72,8 +97,10 @@ class ThesaurusAPI:
raw_words = raw.split('"><b>')[1:]
words = []
for word in raw_words:
- word, rest = word.split('</b>')
- years = word.split(' <span')[0].strip()
+ word_partz = word.split('</b>')
+ word = word_partz[0]
+ years = word_partz[1].split(' <span')[0].strip()
+ years = years.replace('\u2013', '-')
words.append({
'word': word,
'years': years,