sueszli · January 12, 2024 14:23
diff --git a/puppeteer.config.cjs b/puppeteer.config.cjs
 const { join } = require("path");

 module.exports = {
  cacheDirectory: join(__dirname, ".cache", "puppeteer"),
 };
diff --git a/rentals.js b/rentals.js
 import axios from 'axios'
 import * as cheerio from 'cheerio'
 import { assert, log } from 'console'
 import open from 'open'

 const main = async () => {
    let url = process.argv[2]
    assert(process.argv.length !== 2, 'illegal number of arguments')
    assert(url, 'missing url as argument')

    const links = []
    let pageNum = 1
    while (true) {
        // set link for page through query param, repeat
        let pageUrl = url + '&p=' + pageNum++
        let htmlStr = await axios.get(pageUrl).then((r) => r.data)
        let $ = cheerio.load(htmlStr)

        // check if last page
        const regex = /"url": "https:\/\/rentals.ca\/.*"/g
        const foundStr = htmlStr.match(regex)
        const isLastPage = !foundStr || !foundStr.length || foundStr.length < 1
        if (isLastPage) {
            console.log('reached last page -- validate by checking this link: ' + pageUrl)
            break
        }

        // scrape all links
        const found = foundStr.map((s) => s.replace('"url": "', '').replace('"', ''))
        const foundUnique = []
        found.forEach((f) => {
            if (!links.includes(f)) {
                foundUnique.push(f)
                links.push(f)
            }
        })
        console.log('found ' + foundUnique.length + ' links')
    }

    // remove links that are not listings
    const emitted = []
    links.forEach((l) => {
        const count = (l.match(/\//g) || []).length
        if (count < 4) {
            emitted.push(l)
            const index = links.indexOf(l)
            if (index > -1) {
                links.splice(index, 1)
            }
        }
    })
    console.log('emitted ' + emitted.length + " links that aren't listings: ", emitted)

    console.log('press enter key to open the ' + links.length + ' scraped links')
    await new Promise((resolve) => process.stdin.once('data', resolve))

    for (let i = 0; i < links.length; i++) {
        const l = links[i]
        const lurl = new URL(l, url).href
        console.log('opening: ' + lurl)
        await open(lurl)
    }

    process.exit(0)
 }
 main()
	const { join } = require("path");

	module.exports = {
	cacheDirectory: join(__dirname, ".cache", "puppeteer"),
	};
	import axios from 'axios'
	import * as cheerio from 'cheerio'
	import { assert, log } from 'console'
	import open from 'open'

	const main = async () => {
	let url = process.argv[2]
	assert(process.argv.length !== 2, 'illegal number of arguments')
	assert(url, 'missing url as argument')

	const links = []
	let pageNum = 1
	while (true) {
	// set link for page through query param, repeat
	let pageUrl = url + '&p=' + pageNum++
	let htmlStr = await axios.get(pageUrl).then((r) => r.data)
	let $ = cheerio.load(htmlStr)

	// check if last page
	const regex = /"url": "https:\/\/rentals.ca\/.*"/g
	const foundStr = htmlStr.match(regex)
	const isLastPage = !foundStr \|\| !foundStr.length \|\| foundStr.length < 1
	if (isLastPage) {
	console.log('reached last page -- validate by checking this link: ' + pageUrl)
	break
	}

	// scrape all links
	const found = foundStr.map((s) => s.replace('"url": "', '').replace('"', ''))
	const foundUnique = []
	found.forEach((f) => {
	if (!links.includes(f)) {
	foundUnique.push(f)
	links.push(f)
	}
	})
	console.log('found ' + foundUnique.length + ' links')
	}

	// remove links that are not listings
	const emitted = []
	links.forEach((l) => {
	const count = (l.match(/\//g) \|\| []).length
	if (count < 4) {
	emitted.push(l)
	const index = links.indexOf(l)
	if (index > -1) {
	links.splice(index, 1)
	}
	}
	})
	console.log('emitted ' + emitted.length + " links that aren't listings: ", emitted)

	console.log('press enter key to open the ' + links.length + ' scraped links')
	await new Promise((resolve) => process.stdin.once('data', resolve))

	for (let i = 0; i < links.length; i++) {
	const l = links[i]
	const lurl = new URL(l, url).href
	console.log('opening: ' + lurl)
	await open(lurl)
	}

	process.exit(0)
	}
	main()