Last active
February 17, 2018 13:13
-
-
Save benleb/9a50c697af00d6b8f93f6a152d4866e4 to your computer and use it in GitHub Desktop.
get all direct links from a soundcloud playlist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint no-await-in-loop: off, no-console: off */ | |
// scrapecloud.js | 02/18 | |
// get all direct links from soundcloud playlists | |
const puppeteer = require('puppeteer'); | |
const scBaseUrl = 'https://soundcloud.com'; | |
const scPlaylistUrl = `${scBaseUrl}/alex-humenyuk/sets/street-workout`; | |
// function executed in browser | |
function extractItems() { | |
const extLinks = Array.from(document.querySelectorAll('a.trackItem__trackTitle.sc-link-dark.sc-font-light')); | |
return extLinks.map(a => a.getAttribute('href')); | |
} | |
// scroll & extract | |
async function scrapePlaylist(page, extractItemsFunc, itemTargetCount, scrollDelay = 1000) { | |
let items = []; | |
try { | |
let previousHeight; | |
while (items.length < itemTargetCount) { | |
items = await page.evaluate(extractItemsFunc); | |
previousHeight = await page.evaluate('document.body.scrollHeight'); | |
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); | |
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`); | |
await page.waitFor(scrollDelay); | |
} | |
} catch (e) { console.error(e); } | |
return items; | |
} | |
(async () => { | |
// set up the browser and a page | |
const browser = await puppeteer.launch(); | |
const page = await browser.newPage(); | |
page.setViewport({ width: 1280, height: 926 }); | |
await page.goto(scPlaylistUrl); | |
// scroll and extract items from the page. | |
const items = await scrapePlaylist(page, extractItems, 100); | |
const trackLinks = items.map((item) => { | |
const link = /^(\/.*)\?in.*/.exec(item); | |
return `${scBaseUrl}${link[1]}`; | |
}); | |
// output | |
console.log(trackLinks); | |
// close browser | |
await browser.close(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment