Created
September 22, 2017 13:09
-
-
Save anonymous/c4e6acc384f8e13ccafd183a4be0228d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Get e-mail address from HepNames" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import urllib\n", | |
"import urllib.request\n", | |
"import re\n", | |
"\n", | |
"def to_query_string(s):\n", | |
" l = s.split(' ')\n", | |
" l = [urllib.parse.quote(p.lower()) for p in l]\n", | |
" return '+'.join(l)\n", | |
"\n", | |
"query_url = 'https://inspirehep.net/search?cc=HepNames&p=author+{}&sf=exactfirstauthor&so=a&rm=&rg=25&sc=0&of=hb'\n", | |
"\n", | |
"def read_url(name):\n", | |
" url = query_url.format(to_query_string(name))\n", | |
" with urllib.request.urlopen(url) as f:\n", | |
" s = f.read()\n", | |
" return s.decode('utf-8')\n", | |
"\n", | |
"def get_mail_from_string(s):\n", | |
" matches = re.findall(r\"mailto:([\\w\\.\\-\\@]+)\", s)\n", | |
" for match in matches:\n", | |
" if not 'inspire' in match and '@' in match:\n", | |
" return match\n", | |
" return ''\n", | |
"\n", | |
"def get_mail(name, include_name=True):\n", | |
" s = read_url(name)\n", | |
" mail = get_mail_from_string(s)\n", | |
" if not include_name:\n", | |
" return mail\n", | |
" else:\n", | |
" if not mail:\n", | |
" return mail\n", | |
" return '{} <{}>'.format(name, mail)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Demo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"names = \"\"\"Andrzej Buras\n", | |
"Matthias Neubert\n", | |
"Riccardo Barbieri\n", | |
"\"\"\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Andrzej Buras <[email protected]>\n", | |
"Matthias Neubert <[email protected]>\n", | |
"Riccardo Barbieri <[email protected]>\n" | |
] | |
} | |
], | |
"source": [ | |
"for name in names.splitlines():\n", | |
" print(get_mail(name))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment