Created
May 1, 2019 06:29
-
-
Save FSund/66c05796868935051fc9629395856508 to your computer and use it in GitHub Desktop.
Novelkeys/Gateron Ink web scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sources: | |
# https://www.adventuresintechland.com/detect-when-a-webpage-changes-with-python/ | |
# https://chrisalbon.com/python/web_scraping/monitor_a_website/ | |
import hashlib | |
import urllib # use built-in urllib | |
import random | |
import time | |
from bs4 import BeautifulSoup as soup | |
from pushbullet import Pushbullet | |
# url to be scraped | |
url = "https://novelkeys.xyz/collections/switches/products/gateron-ink-switches" | |
# time between checks in seconds | |
# sleeptime = 60 # every minute | |
sleeptime = 60*60 # every hour | |
def _getUserAgent(): | |
# random integer to select user agent | |
randomint = random.randint(0, 7) | |
# User_Agents | |
# This helps skirt a bit around servers that detect repeaded requests from the same machine. | |
# This will not prevent your IP from getting banned but will help a bit by pretending to be different browsers | |
# and operating systems. | |
user_agents = [ | |
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', | |
'Opera/9.25 (Windows NT 5.1; U; en)', | |
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', | |
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', | |
'Mozilla/5.0 (Windows NT 5.1) AppleWbepython Kit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.142 Safari/535.19', | |
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0', | |
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:8.0.1) Gecko/20100101 Firefox/8.0.1', | |
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19' | |
] | |
return user_agents[randomint] | |
def _getSoup(): | |
user_agent = _getUserAgent() | |
print("user agent:\n %s" % user_agent) | |
opener = urllib.request.build_opener() | |
opener.addheaders = [('User-agent', user_agent)] | |
response = opener.open(url, timeout=30) # 30 s timeout | |
# the_page = response.read() | |
# return the_page | |
web_soup = soup(response, features="html.parser") | |
return web_soup | |
def _doTheThing(pb): | |
try: | |
web_soup = _getSoup() | |
except (TimeoutError, urllib.error.URLError, TimeoutError) as err: | |
print("Got known error, which will be ignored") | |
print(err) | |
return # ignore timeout error, try again | |
soldout = web_soup.find(name="div", attrs={'class': 'swatch-element silent-ink soldout'}) | |
available = web_soup.find(name="div", attrs={'class': 'swatch-element silent-ink available'}) | |
if available or not soldout: | |
print("Gateron Inks ARE AVAILABLE") | |
push = pb.push_link("GATERON INKS ARE AVAILABLE", url) | |
return | |
else: | |
print("Gateron Inks are not available") | |
# push = pb.push_note("Not available", "Sorry.") | |
def doTheThingAndSleep(pb): | |
_doTheThing(pb) | |
# sleep a random time | |
factor = random.uniform(0.9, 1.1) # factor between 0.8 and 1.2 | |
print("Sleeping for %d seconds" % (sleeptime*factor)) | |
time.sleep(sleeptime*factor) | |
api_key = "Insert your Pushbullet API key here" | |
pb = Pushbullet(api_key) | |
while True: # Run forever | |
try: | |
doTheThingAndSleep(pb) | |
except Exception as err: | |
push = pb.push_note("Your script crashed", str(err)) | |
raise err | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment