Skip to content

Instantly share code, notes, and snippets.

@whitead
Last active December 1, 2024 11:16
Show Gist options
  • Save whitead/5aa3d2867af434e0222e4373e1c2534d to your computer and use it in GitHub Desktop.
Save whitead/5aa3d2867af434e0222e4373e1c2534d to your computer and use it in GitHub Desktop.
Compute number of self citations with Semantic Scholar
# License CC0
import httpx
async def analyze_self_citations(doi):
async with httpx.AsyncClient() as client:
response = await client.get(
f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}",
params={"fields": "title,authors,references.authors"}
)
response.raise_for_status()
paper = response.json()
if not paper['references']:
print(paper)
raise ValueError("Could not find references from Semantic Scholar")
def check_overlap(n1, n2):
# remove initials and check for name intersection
s1 = {w for w in n1.lower().replace(".", "").split() if len(w) > 1}
s2 = {w for w in n2.lower().replace(".", "").split() if len(w) > 1}
return len(s1 | s2) == len(s1)
authors = {a['name']: [0, 0] for a in paper['authors']}
for ref in paper['references']:
ref_authors = {a['name'] for a in ref['authors']}
for author in authors:
authors[author][1] += 1
if any(check_overlap(author, ra) for ra in ref_authors):
authors[author][0] += 1
print(f"Self citations in \"{paper['title']}\"")
print(f" N = {len(paper['references'])}")
for author, (self_cites, total) in authors.items():
print(f" {author}: {self_cites/total:.2%} self citation")
await analyze_self_citations("10.1038/s42256-024-00832-8")
# Self citations in "Augmenting large language models with chemistry tools"
# N = 114
# Andrés M Bran: 0.00% self citation
# Sam Cox: 0.88% self citation
# Oliver Schilter: 0.00% self citation
# Carlo Baldassari: 0.00% self citation
# Andrew D. White: 5.26% self citation
# P. Schwaller: 6.14% self citation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment