arshamalh · February 7, 2022 13:58
diff --git a/1_running_scrapy_from_anothor_python_file.py b/1_running_scrapy_from_anothor_python_file.py
 from scrapy import crawler
 from scrapy.utils.project import get_project_settings
 # pip install crochet
 from crochet import setup as crochet_setup, run_in_reactor


 crochet_setup()
 runner = crawler.CrawlerRunner(get_project_settings())

 @run_in_reactor
 def getMovieInfo():
    deferred = runner.crawl("spider_name")
    return deferred

diff --git a/2_1_custom_spider_arguments.py b/2_1_custom_spider_arguments.py
 # This file is a part a hint related to along_apscheduler_and_more_options.py file
 # Helping us to pass custome arguments to our file whenever we are calling the spider from another script.
 import scrapy

 class SpiderNameSpider(scrapy.Spider):

    name = "spider_name"

    def __init__(self, **kwargs):
        super(SpiderNameSpider, self).__init__(**kwargs)
        self.custom_params = kwargs.get("custom_arguments")
diff --git a/2_scrapy_along_apscheduler_and_more_options.py b/2_scrapy_along_apscheduler_and_more_options.py
 from datetime import datetime

 from pytz import utc
 from scrapy import crawler
 from scrapy.utils.project import get_project_settings
 from crochet import setup as crochet_setup, run_in_reactor
 from apscheduler.schedulers.blocking import BlockingScheduler

 crochet_setup()
 runner = crawler.CrawlerRunner(get_project_settings())
 schedule = BlockingScheduler(timezone=utc)

 def doSomethingAfterScrape():
  # This is the callback of scrappy crawler and will be called whenever scrapping finish.
  print("Scrappying finished.")

 @run_in_reactor
 def DoSomeScrapy():
    # Custom arguments must be implemented in the spider class
    deferred = runner.crawl("spider_name", custom_arguments="custom_values")
    deferred.addCallback(doSomethingAfterScrape)
    return deferred

 schedule.add_job(
    DoSomeScrapy,
    'interval',
    next_run_time=datetime.now(tz=utc), # Optional argument.
    jitter=120,  # Jitter may be need to randomize scrappying, it's 120 seconds.
    seconds=1000
 )

 schedule.start()
	from scrapy import crawler
	from scrapy.utils.project import get_project_settings
	# pip install crochet
	from crochet import setup as crochet_setup, run_in_reactor


	crochet_setup()
	runner = crawler.CrawlerRunner(get_project_settings())

	@run_in_reactor
	def getMovieInfo():
	deferred = runner.crawl("spider_name")
	return deferred
	# This file is a part a hint related to along_apscheduler_and_more_options.py file
	# Helping us to pass custome arguments to our file whenever we are calling the spider from another script.
	import scrapy

	class SpiderNameSpider(scrapy.Spider):

	name = "spider_name"

	def __init__(self, **kwargs):
	super(SpiderNameSpider, self).__init__(**kwargs)
	self.custom_params = kwargs.get("custom_arguments")
	from datetime import datetime

	from pytz import utc
	from scrapy import crawler
	from scrapy.utils.project import get_project_settings
	from crochet import setup as crochet_setup, run_in_reactor
	from apscheduler.schedulers.blocking import BlockingScheduler

	crochet_setup()
	runner = crawler.CrawlerRunner(get_project_settings())
	schedule = BlockingScheduler(timezone=utc)

	def doSomethingAfterScrape():
	# This is the callback of scrappy crawler and will be called whenever scrapping finish.
	print("Scrappying finished.")

	@run_in_reactor
	def DoSomeScrapy():
	# Custom arguments must be implemented in the spider class
	deferred = runner.crawl("spider_name", custom_arguments="custom_values")
	deferred.addCallback(doSomethingAfterScrape)
	return deferred

	schedule.add_job(
	DoSomeScrapy,
	'interval',
	next_run_time=datetime.now(tz=utc), # Optional argument.
	jitter=120, # Jitter may be need to randomize scrappying, it's 120 seconds.
	seconds=1000
	)

	schedule.start()