This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function pageFunction(context) { | |
// called on every page the crawler visits, use it to extract data from it | |
var $ = context.jQuery; | |
var result = []; | |
var content = $('meta[id="_bootstrap-layout-init"]').attr("content"); | |
var api_key = JSON.parse(content).api_config.key; | |
var listId = context.request.url.match(/[^\/]+$/g); | |
var getReviewData = function(offset) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function pageFunction(context) { | |
var PAGES = 100; // each page has 40 reviews | |
// called on every page the crawler visits, use it to extract data from it | |
var $ = context.jQuery; | |
var result = []; | |
var extractData = function(page) { | |
if( page < PAGES ) { | |
var api = "https://play.google.com/store/getreviews?authuser=0"; | |
$.ajax({ | |
url: api, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function pageFunction(context) { | |
// called on every page the crawler visits, use it to extract data from it | |
var $ = context.jQuery; | |
if (context.request.label === 'start') { | |
context.skipOutput(); | |
var count = parseInt($('.count').text()); | |
for(var i=0; i<=count; i+=20) { | |
context.enqueuePage("http://www.topshop.com/webapp/wcs/stores/servlet/CatalogNavigationAjaxSearchResultCmd?storeId=12556&catalogId=33057&langId=-1&dimSelected=%2Fen%2Ftsuk%2Fcategory%2Fclothing-427%2FN-82zZdgl%3FNo%3D" + i + "%26Nrpp%3D20%26siteId%3D%252F12556%26categoryId%3D203984"); | |
} | |
} else { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function pageFunction(context) { | |
// called on every page the crawler visits, use it to extract data from it | |
var $ = context.jQuery; | |
if (context.request.label === 'detail') { | |
context.skipLinks(); | |
// return data from internal JS variable | |
return property; | |
} else { | |
context.skipOutput(); | |
// enqueue next pages in pagination |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// originaly from https://kb.apify.com/tips-and-tricks/scraping-data-from-websites-using-schemaorg-microdata | |
function schemaOrgParser() { | |
var extractValue = function(elem) { | |
return $(elem).attr("content") || $(elem).text() | |
|| $(elem).attr("src") || $(elem).attr("href") || null; | |
}; | |
var addProperty = function(item,propName,value) { | |
if( typeof(value)==='string' ) | |
value = value.trim(); | |
if( Array.isArray(item[propName]) ) |