Created
December 19, 2021 23:12
-
-
Save Zalasyu/694df422ecf50bf55c3eeeb0c5d195a8 to your computer and use it in GitHub Desktop.
Your instagram scraper methods in here.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Alec Moldovan | |
# Description: This module contains the logic for an instagram data scraper bot. | |
# Import Standard Libraries | |
import time, os, random | |
import json | |
# Import third-party modules | |
from selenium.webdriver.remote.webdriver import WebDriver | |
from selenium.common.exceptions import NoSuchElementException, TimeoutException | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.wait import WebDriverWait | |
# Logger Module | |
from loguru import logger | |
# logger.add("bot_{level}.log", level = info, rotation="00:00") | |
# Import Local Modules | |
import config | |
class Bot: | |
""" | |
Interacts with instagram's pages through the webdriver. | |
""" | |
def __init__(self, driver: WebDriver) -> None: | |
# Initialize chrome webdriver | |
self.driver = driver | |
def search_for_website(self, url:str) -> None: | |
"""Search and navigate to target webpage. | |
Parameters: | |
url:str -> URL for target webpage. | |
Return: | |
None | |
""" | |
self.driver.get(url) | |
def login(self) -> None: | |
""" | |
Login to an instagram account. | |
Parameters: None | |
Return: None | |
""" | |
# Go to instagram login page | |
logger.debug("Opening a google chrome browser and going to instagram login page") | |
self.search_for_website("https://www.instagram.com/") | |
# Fill out login form and click submit button | |
try: | |
self.wait_until(EC.presence_of_element_located((By.NAME, 'username'))) | |
try: | |
# Select username and password input text fields | |
user_field = self.driver.find_element_by_name("username") | |
pass_field = self.driver.find_element_by_name("password") | |
# Clear fields | |
user_field.clear() | |
pass_field.clear() | |
# Fill username and password text fields with username and password credentials | |
user_field.send_keys(config.USERNAME) | |
pass_field.send_keys(config.PASSWORD) | |
# Find and click the submit button to login. | |
logger.debug("Submitting login info!") | |
self.driver.find_element_by_xpath( | |
'//*[@id="loginForm"]/div/div[3]/button/div' | |
).click() | |
except NoSuchElementException: | |
logger.exception("Could not find element!") | |
except TimeoutException: | |
pass | |
# Checkpoint # 1 | |
logger.info("Passed the login page.") | |
# Go through popup messages | |
# Skip remember this browser prompt | |
try: | |
logger.debug("Skipping save browser prompt.") | |
self.driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div/div/section/div/button').click() | |
except NoSuchElementException: | |
pass | |
# Turn off notifications prompt | |
try: | |
logger.debug("Skipping turn on notifications prompt.") | |
self.driver.find_element_by_xpath("/html/body/div[4]/div/div/div/div[3]/button[2]").click() | |
except NoSuchElementException: | |
pass | |
def search(self, keyword: str = "emmalilywinery") -> None: | |
"""Search content by keyword with the instagram search box | |
Parameters: | |
str: keyword | |
Return: None | |
""" | |
try: | |
logger.debug("Locating Searchbox...") | |
time.sleep(3) | |
searchBox = self.wait_until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Search']"))) | |
searchBox.clear() | |
# Fill Search box and submit search. | |
searchBox.send_keys(keyword) | |
time.sleep(3) | |
searchBox.send_keys(Keys.ENTER) | |
time.sleep(3) | |
searchBox.send_keys(Keys.ENTER) | |
time.sleep(5) | |
except TimeoutException: | |
logger.exception("Search box not found!") | |
def scroll(self) -> None: | |
"""Scroll and load all posts from profile webpage. | |
Parameters: | |
int: scroll_amt -> Amount of space that an instagram webpage should load in | |
Return: | |
None | |
""" | |
logger.debug("Begin scrolling...") | |
# Set one full page scroll to lenPage | |
lenPage = self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);let lenPage=document.body.scrollHeight;return lenPage;") | |
logger.debug(f'Length of page is {lenPage}') | |
# Keep scrolling until no more content | |
match = False | |
no_of_scrolls = 0 | |
while(match == False): | |
lastCount = lenPage | |
no_of_scrolls += 1 | |
time.sleep(2) | |
lenPage = self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);let lenPage=document.body.scrollHeight;return lenPage;") | |
logger.debug(f'Length of last count is {lastCount}') | |
logger.debug(f'Length of page is {lenPage}') | |
if (lastCount == lenPage) or (no_of_scrolls == 10): | |
logger.info("Scrolled to bottom!") | |
match = True | |
def open_and_switch(self, url:str) -> None: | |
"""Open link in a new tab and switch to that tab. | |
Parameters: | |
url:str --> hyperlink | |
Return: | |
None | |
""" | |
pass | |
def close_and_switch_back(self) -> None: | |
"""Close current tab and switch back to previous tab | |
Parameters: | |
Return: | |
None | |
""" | |
pass | |
def wait_until(self, condition, timeout=5): | |
""" | |
Tell Webdriver to wait until a condition is met or timeout. | |
Parameters: | |
condition: bool -> True/False | |
timeout:int -> (default: 5 secs) | |
Return: WebDriverWait object | |
""" | |
return WebDriverWait(self.driver, timeout).until(condition) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment