Last active
December 15, 2019 22:11
-
-
Save ZeroX-DG/5fadc377fb19f19fada40a6187006425 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const firebase = require('firebase'); | |
if(!firebase.apps.length) { | |
let config = { | |
apiKey: "xxxxxxxxxxxxxxxxxxxxx", | |
authDomain: "xxxxxxxxxxxxxxxxxxxxxxxx", | |
databaseURL: "xxxxxxxxxxxxxxxxxxxxx", | |
projectId: "xxxxxxxxx", | |
storageBucket: "xxxxxxxxxxxxxxxxxxxx", | |
messagingSenderId: "xxxxxxxxxxxxxxx" | |
}; | |
firebase.initializeApp(config); | |
} | |
const db = firebase.database(); | |
(async () => { | |
const browser = await puppeteer.launch({headless: false, timeout: 0}); | |
const page = await browser.newPage(); | |
const page_start = 2; | |
const max_page = 31; | |
const item_per_page = 20; | |
const link_start = 0; // change this if max timeout | |
browser.on('disconnected', () => {console.log("disconnected !")}); | |
for (let current_page = page_start; current_page <= max_page; current_page++) { | |
let start = (current_page - 1) * item_per_page; | |
console.log( | |
"-------- PAGE " + | |
current_page + | |
" ( https://www.ielts-mentor.com/cue-card-sample?start=" + | |
start + | |
" ) --------" | |
); | |
console.log("[#] Getting links..."); | |
await page.goto( | |
'https://www.ielts-mentor.com/cue-card-sample?start=' + start, | |
{waitUntil: 'networkidle2'} | |
); | |
// Extract the list of links from the result page | |
const cue_card_links = await page.evaluate((selector) => { | |
const anchors_node_list = document.querySelectorAll(selector); | |
const anchors = [...anchors_node_list]; | |
return anchors.map(link => link.href); | |
}, '#adminForm > table > tbody > tr > td.list-title > a'); | |
console.log("[#] Done getting links\n"); | |
for (let i = link_start; i < cue_card_links.length; i++) { | |
let link = cue_card_links[i]; | |
console.log("\n[*] Trying: " + link); | |
await page.goto(link, {waitUntil: 'networkidle2'}); | |
const question = await page.evaluate((selector1, selector2) => { | |
let question_dom = document.querySelector(selector1); | |
if (!question_dom) { | |
question_dom = document.querySelector(selector2); | |
} | |
return question_dom.textContent.trim(); | |
}, | |
"#main > article > h2:nth-child(5) > span", | |
"#main > article > h3:nth-child(5) > span" | |
); | |
const cues = await page.evaluate((selector) => { | |
let cue_doms = [...document.querySelector(selector).children] | |
return cue_doms.map(cue => cue.textContent.trim()); | |
}, "#main > article > ul:nth-child(8)"); | |
//console.log({title, cues}); | |
saveToFirebase(title, cues); | |
} | |
} | |
await page.close(); | |
await browser.close(); | |
console.log("------ DONE ------"); | |
})(); | |
let saveToFirebase = (question, cues) => { | |
let questionRef = db.ref('/questions'); | |
let newQuestionRef = questionRef.push(); | |
let newQuestionKey = newQuestionRef.key; | |
newQuestionRef.set({ | |
question: question, | |
cues: cues | |
}); | |
console.log("[#] Success => Id: " + newQuestionKey + " | Title: " + question); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment