Last active
June 14, 2018 16:49
-
-
Save liuyigh/1dc809424ff9db6cd80d30ed95abaa10 to your computer and use it in GitHub Desktop.
Trialert: Notify You by eMail When Clinical Trial Updates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
index | name | ctID | lastUpdate | |
---|---|---|---|---|
0 | DV281 | NCT03326752 | November 1, 2017 | |
1 | SD101 | NCT02521870 | December 7, 2017 | |
2 | AZD1419 | NCT02898662 | December 21, 2017 | |
3 | SD101+MK1966 | NCT02731742 | December 11, 2017 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lxml.html as lh | |
import requests #, sqlite | |
import pandas as pd | |
import sys, traceback | |
# use csv for now; sqlite in the future | |
trialDB = pd.read_csv('trialDB.csv', index_col=0) | |
# initialize email | |
email = 'Trialert:<br>' | |
# notify script error | |
def notifyException(): | |
exc_type, exc_value, exc_traceback = sys.exc_info() | |
lines = traceback.format_exception(exc_type, exc_value, exc_traceback) | |
pre = ''.join('!> ' + line for line in lines) | |
pre = pre.replace('>','>') | |
pre = pre.replace('<','<') | |
html = '<pre>'+pre+'</pre>' | |
# post message with mailgun api | |
# revise this according to your mailgun setup | |
requests.post( | |
"https://api.mailgun.net/v3/<your_domain>/messages", | |
auth=("api", "<api_key>"), | |
data={"from": "<your sending email>", | |
"to": ["<receiving email>"], | |
"subject": "trilert script error", | |
"html": html}) | |
# scrape clinicaltrials.gov | |
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
'Accept-Encoding': 'none', | |
'Accept-Language': 'en-US,en;q=0.8', | |
'Connection': 'keep-alive'} | |
try: | |
for row in trialDB.itertuples(): | |
r = requests.get('https://clinicaltrials.gov/ct2/show/{}'.format(row.ctID),stream=True, headers=hdr) | |
html = lh.fromstring(r.content) | |
lastUpdate = html.xpath("//span[@data-term='Last Update Posted']/../text()")[1][3:] # or .strip('\n: ') | |
if lastUpdate != row.lastUpdate: | |
email += '===========<br>Trial '+ row.name +' updated on '+ lastUpdate +' :<br>https://clinicaltrials.gov/ct2/show/{}<br>'.format(row.ctID) | |
trialDB.loc[row.Index, 'lastUpdate'] = lastUpdate | |
except: | |
notifyException() | |
trialDB.to_csv('trialDB.csv') | |
## if updated, add link to email; if email != init, send email as alert; update CSV with new info. | |
## uncomment the next line to use it in a command line | |
# print(email) | |
if email !='Trialert:<br>': | |
requests.post( | |
"https://api.mailgun.net/v3/<your_domain>/messages", | |
auth=("api", "<api_key>"), | |
data={"from": "<your sending email>", | |
"to": ["<receiving email>"], | |
"subject": "Clinical Trial Alert", | |
"html": email}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment