diff options
Diffstat (limited to 'megapixels/app/utils/api_utils.py')
| -rw-r--r-- | megapixels/app/utils/api_utils.py | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/megapixels/app/utils/api_utils.py b/megapixels/app/utils/api_utils.py new file mode 100644 index 00000000..ec00113e --- /dev/null +++ b/megapixels/app/utils/api_utils.py @@ -0,0 +1,155 @@ +import json +import urllib +import urllib.request + + +class WikipediaAPI: + + url_base = 'https://en.wikipedia.org/w/api.php' + + def _url_builder(self, q): + + # https://www.mediawiki.org/wiki/API%3aProperties#Info%3a_Parameters + + params = { + 'redirects': '', + 'ppprop': 'displaytitle', + 'prop': 'pageprops|pageimages|description', + 'generator': 'prefixsearch', + 'action': 'query', + 'format': 'json', + 'piprop': 'thumbnail', + #'pithumbsize': 160, + 'pilimit': 1, + 'gpssearch': q, + 'gpsnamespace': 0, + 'gpslimit': 1 + } + url = f'{self.url_base}?{urllib.parse.urlencode(params)}' + return url + + def _api_search(self, url): + # set empty object + obj = { + 'wp_description': '', + 'wp_page_id': '', + 'wp_name': '' + } + try: + json_response = urllib.request.urlopen(url).read() + response = json.loads(json_response) + obj['wp_accessed'] = True + query = response.get('query', None) + if query: + pages = query.get('pages',[]) + if pages: + page_id= list(pages.keys())[0] + if int(page_id) != -1: + page = pages[page_id] + # populate with successful result + obj['wp_name'] = page['title'] + obj['wp_page_id'] = page_id + obj['wp_description'] = page.get('description', '') # not always available + # if fail, return None + except Exception as e: + obj['wp_error'] = e + obj['wp_accessed'] = False + return obj + + def get_meta(self, query_obj): + '''Searches Wikipedia API for query string''' + if query_obj.get('wp_accessed', False): + return query_obj + else: + url = self._url_builder(query_obj['query']) + return self._api_search(url) + + def search(self, q): + '''Searches Wikipedia API for query string''' + url = self._url_builder(q) + return self._api_search(url) + + +class GoogleKnowledgeGraph: + + url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search' + + def __init__(self, key): + self._api_key = key + + + def _get_kg_meta(self, result_obj, params): + + params['indent'] = True # JSON indent + params['key'] = self._api_key + params['limit'] = 1 + ''' + Restricts returned entities to those of the specified types. + For example, you can specify `Person` (as defined in http://schema.org/Person) + to restrict the results to entities representing people. + If multiple types are specified, returned entities will contain one or more of these type''' + params['types'] = 'Person' + + '''Enables prefix (initial substring) match against names and + aliases of entities. For example, a prefix `Jung` will match entities + and aliases such as `Jung`, `Jungle`, and `Jung-ho Kang`.''' + params['prefix'] = False + + url = f'{self.url_kg_api}?{urllib.parse.urlencode(params)}' + try: + json_response = urllib.request.urlopen(url).read() + except Exception as e: + result_obj['kg_error'] = str(e) + result_obj['kg_accessed'] = False + else: + response = json.loads(json_response) + items = response.get('itemListElement', []) + if items: + item = items[0] + item_result = item.get('result', []) + result_obj['kg_url'] = item.get('url', '') + result_obj['kg_description'] = item_result.get('description', '') + result_obj['kg_id'] = item_result.get('@id', '').replace('kg:','') + result_obj['kg_name'] = item_result.get('name', '') + result_obj['kg_score'] = item.get('resultScore', 0.0) + det_desc = item_result.get('detailedDescription', '') + if det_desc: + result_obj['kg_bio'] = det_desc.get('articleBody','') + result_obj['kg_bio_url'] = det_desc.get('url','') + else: + result_obj['kg_bio'] = '' + result_obj['kg_bio_url'] = '' + result_img = item_result.get('image', '') + if result_img: + result_obj['kg_image_url'] = result_img.get('contentUrl', '') + else: + result_obj['kg_image_url'] = '' + result_obj['kg_error'] = '' + else: + # search was valid but no results + result_obj['kg_url'] = '' + result_obj['kg_description'] = '' + result_obj['kg_id'] = '' + result_obj['kg_name'] = '' + result_obj['kg_score'] = 0 + result_obj['kg_bio'] = '' + result_obj['kg_bio_url'] = '' + result_obj['kg_image_url'] = '' + + result_obj['kg_accessed'] = True + + return result_obj + + + def get_kg_from_name(self, obj): + if obj.get('kg_accessed', False): + return obj + params = {'query': obj['query']} + return self._get_kg_meta(obj, params) + + + def get_kg_from_kg_id(self, obj): + if obj.get('kg_accessed', False): + return obj + params = {'ids': obj['kg_ig']} + return self._get_kg_meta(obj, params) |
