jslee02 · July 21, 2021 17:13
diff --git a/paperpile_crawler.py b/paperpile_crawler.py
 # This is a snippet for downloading PDFs to local computer from Paperfile.
 # See https://forum.paperpile.com/t/download-multiple-pdfs-to-computer/2405/7 for the details.
 #
 # Summary:
 # 1. In your paperfile account, create a shared folder and move the papers you want to download their pdfs
 # 2. Run the below script with adding the link to the shared folder. Install dependencies if needed:
 # pip3 install urllib3 -U
 # pip3 install beautifulsoup4 -U
 #
 # Tested with Python 3.9.5.

 import os
 from bs4 import BeautifulSoup
 import re
 import urllib.request

 # settings
 shared_folder_url = ""  # your shared folder url
 save_dir = ""
 end_page = 1            # update to the end page number of the shared folder

 pages = [""]
 for i in range(2, end_page):
    pages.append("/" + str(i))

 all_urls = []
 for page in pages:
    base_url = shared_folder_url + page
    req = urllib.request.urlopen(base_url)
    soup = BeautifulSoup(req, from_encoding=req.info().get_param('charset'))

    for link in soup.find_all("a", href=re.compile("download")):
        print(link['href'])
        all_urls.append("https://paperpile.com" + link['href'])

 idx = 1
 for pdf_link in all_urls:
    print(pdf_link)
    try:
        urllib.request.urlretrieve(pdf_link,
                                   os.path.join(save_dir,
                                                str(idx) + ".pdf"))
        idx += 1
    except Exception as ex:
        print(str(ex.__class__.__name__) + " - " + str(ex))
	# This is a snippet for downloading PDFs to local computer from Paperfile.
	# See https://forum.paperpile.com/t/download-multiple-pdfs-to-computer/2405/7 for the details.
	#
	# Summary:
	# 1. In your paperfile account, create a shared folder and move the papers you want to download their pdfs
	# 2. Run the below script with adding the link to the shared folder. Install dependencies if needed:
	# pip3 install urllib3 -U
	# pip3 install beautifulsoup4 -U
	#
	# Tested with Python 3.9.5.

	import os
	from bs4 import BeautifulSoup
	import re
	import urllib.request

	# settings
	shared_folder_url = "" # your shared folder url
	save_dir = ""
	end_page = 1 # update to the end page number of the shared folder

	pages = [""]
	for i in range(2, end_page):
	pages.append("/" + str(i))

	all_urls = []
	for page in pages:
	base_url = shared_folder_url + page
	req = urllib.request.urlopen(base_url)
	soup = BeautifulSoup(req, from_encoding=req.info().get_param('charset'))

	for link in soup.find_all("a", href=re.compile("download")):
	print(link['href'])
	all_urls.append("https://paperpile.com" + link['href'])

	idx = 1
	for pdf_link in all_urls:
	print(pdf_link)
	try:
	urllib.request.urlretrieve(pdf_link,
	os.path.join(save_dir,
	str(idx) + ".pdf"))
	idx += 1
	except Exception as ex:
	print(str(ex.__class__.__name__) + " - " + str(ex))