diff options
Diffstat (limited to 's2-pdf-report.py')
| -rw-r--r-- | s2-pdf-report.py | 44 |
1 files changed, 1 insertions, 43 deletions
diff --git a/s2-pdf-report.py b/s2-pdf-report.py index d659ed15..0748897f 100644 --- a/s2-pdf-report.py +++ b/s2-pdf-report.py @@ -22,10 +22,9 @@ def s2_pdf_report(): addresses = AddressBook() for fn in glob.iglob('{}/**/*.txt'.format(PDF_DIR), recursive=True): paper_id = fn.replace(PDF_DIR, '').split('/')[2] - paper = load_paper(paper_id) total_count += 1 # print(paper_id) - headings, found_abstract = read_headings(fn, paper) + headings, found_abstract = read_headings(fn) heading_string = '\n'.join(headings[0:20]) found_addresses = [] if not found_abstract: @@ -67,47 +66,6 @@ def s2_pdf_report(): def percent(a,b): return round(100 * a / b) -def read_headings(fn, paper): - headings = [] - found_abstract = False - found_authors = [] - journal = paper.journal.lower() - authors = [ (a[0], a[1], a[1].lower(),) for a in paper.authors ] - with open(fn, 'r') as f: - for line in f.readlines(): - line = re.sub(r"\S*@\S*\s?", '', line) - l = line.lower().strip() - if len(l) < 5: - continue - if line[0] == 'a' or line[0] == 'b' or line[0] == 'c' or line[0] == '1' or line[0] == '2' or line[0] == '3' or line[0] == '4': - line = line[1:] - line = line.strip("∗†‡") - line = line.replace("fl", "fl").replace('ff', 'ff').replace('ffi', 'ffi').replace('ffl', 'ffl') - line = line.strip() - if 'abstract' in l: - found_abstract = True - break - if journal and journal in l: - continue - names = [s.strip() for s in re.split(',| and ', l)] - was_found = False - for name in names: - found = find_authors(authors, name) - if found: - was_found = True - # print("found {}".format(found[1])) - if found[0]: - found_authors.append(found) - continue - headings.append(line.strip()) - return headings, found_abstract - -def find_authors(authors, line): - for a in authors: - if a[2] in line: - return a - return None - def paper_path(paper_id): return '{}/{}/{}'.format(PDF_DIR, paper_id[0:2], paper_id) |
