Created
April 24, 2017 12:53
-
-
Save stefanthaler/f0b13dc3a320d0e7df1adb40505eaf25 to your computer and use it in GitHub Desktop.
Checks a latex .bib file for links. If the script finds links, it tries to reach them to see if they are still alive.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# needs bibtexparser==0.6.2, argparse | |
""" | |
Parses a Bib file and checks if all links are available | |
""" | |
# imports | |
import bibtexparser | |
import argparse | |
import re | |
import requests | |
# arguments | |
parser = argparse.ArgumentParser(description='Parses a Bib file and checks if all links are available') | |
parser.add_argument('-bf', '--bib_file', type=str, default="references.bib", help='The bib file to check') | |
args = parser.parse_args() | |
# check url | |
def verify_url(url, bib_entry_id, key, value, print_only_error=False): | |
r = requests.head(url) | |
if print_only_error and r.ok: return r.ok# guard clause | |
status = "OK" if r.ok else "ERROR" | |
print("\n{} DOI ({})".format(bib_entry_id, status)) | |
print("\t{}: {}".format(key,value)) | |
print("\tStatus: (OK={status},Code:{status_code}, is_redirect:{redirect}) ".format(status=r.ok, status_code=r.status_code, redirect=r.is_redirect)) | |
return r.ok | |
# walk through bib file and verify links | |
LINKS_NOT_FOUND = [] | |
with open(args.bib_file) as bibtex_file: | |
db = bibtexparser.load(bibtex_file) | |
for entry_key, bib_entry in db.entries_dict.items(): | |
for k,v in bib_entry.items(): | |
# check all links | |
value_is_link = re.match(r"^http.?://.+?$", v) | |
if value_is_link: | |
if not verify_url(v,entry_key, k, v): LINKS_NOT_FOUND.append(entry_key) | |
# check dois | |
value_is_doi = k == "doi" | |
if value_is_doi: | |
cleaned_doi = re.sub(r"[\{\}\\]","",v) | |
doi_url = "https://doi.org/"+cleaned_doi | |
if not verify_url(doi_url,entry_key, k, v): LINKS_NOT_FOUND.append(entry_key) | |
LINKS_NOT_FOUND=set(sorted(LINKS_NOT_FOUND)) | |
if len(LINKS_NOT_FOUND)==0: | |
print("Everything seems to be ok.") | |
else: | |
print("'{}' contain(s) invalid links.".format(",".join(LINKS_NOT_FOUND))) | |
print("Done. Bye Bye.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment