Created
January 22, 2019 09:42
-
-
Save jrainlau/55a46bd0d3318e948331dc81e3d22d32 to your computer and use it in GitHub Desktop.
wallpapers爬虫
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer') | |
const fs = require('fs') | |
const cateNames = ['aero', 'auroras', 'black', 'bokeh', 'colorful', 'creative', 'fresh', 'macro', 'patterns', 'rainbow', 'vector_art', 'white', 'animals', 'birds', 'horses', 'insects', 'others', 'pets', 'reptiles_&_frogs', 'sea', 'wild', 'architecture', 'army', 'artistic', '3d', 'abstract', 'anime', 'drawings', 'fantasy', 'graffiti', 'grunge', 'sculpture', 'typography', 'urban', 'black_and_white', 'cartoons', 'bee_movie', 'bolt', 'brave', 'cars', 'coraline', 'futurama', 'gnomeo_&_juliet', 'ice_age', 'incredibles', 'kung_fu_panda', 'madagascar', 'monsters_inc', 'ninja_turtles', 'old_disney', 'open_season', 'others', 'planet_51', 'ratatouille', 'shrek', 'south_park', 'tangled', 'the_princess_and_the_frog', 'the_simpsons', 'tinker_bell', 'toy_story', 'up', 'walle', 'celebrities', 'models', 'movies', 'music', 'charity', 'city', 'computers', 'android', 'firefox', 'hardware', 'linux', 'mac', 'nvidia', 'others', 'vaio', 'web', 'windows', 'cute', 'elements', 'earth', 'fire', 'water', 'food_and_drink', 'funny', 'games', 'age_of_empires', 'angry_birds', 'anno_1404', "assassin's_creed", 'avatar', 'batman', 'battlefield', 'bioshock', 'brink', 'call_of_duty', 'chess', 'command_and_conquer', 'crysis', "dante's_inferno", 'darksiders', 'dead_space', 'destiny', 'deus_ex', 'devil_may_cry', 'diablo', 'dota', 'dragon_age', 'driver', 'empire_total_war', 'fable', 'fallout', 'far_cry', 'fear', 'final_fantasy', 'forza_motorsport', 'gears_of_war', 'ghost_recon', 'god_of_war', 'gran_turismo', 'grand_theft_auto', 'guild_wars', 'half_life', 'halo', 'heavenly_sword', 'heroes', 'hitman', 'infamous', 'killzone', 'l.a._noire', 'left_4_dead', 'machinarium', 'mario', 'mass_effect', 'medal_of_honor', 'metal_gear', 'midnight_club', 'minecraft', "mirror's_edge", 'mortal_kombat', 'need_for_speed', 'other_games', 'overlord', 'poker', 'portal', 'prince_of_persia', 'prototype', 'quake_wars', 'rayman', 'rayman_raving_rabbids', 'red_dead_redemption', 'resident_evil', 'rift', 'rockstar_games', 's.t.a.l.k.e.r.', 'splinter_cell', 'star_wars', 'starcraft', 'street_fighter', 'the_elder_scrolls', 'the_witcher', 'thief', 'tom_clancy', 'tomb_raider', 'trine', 'two_worlds', 'uncharted', 'valkyria_chronicles', 'warhammer', 'watch_dogs', 'world_of_warcraft', 'girls', 'holidays', 'birthday', "childrenren's_day", 'christmas', 'easter', "father's_day", 'halloween', 'independence_day', "mother's_day", 'new_year', "saint_patrick's_day", "valentine's_day", 'love', 'motors', 'airplane', 'atv', 'cars', 'classic_cars', 'motorcycles', 'others', 'trains', 'movies', '28_weeks_later', '300', 'alice_in_wonderland', 'angels_and_demons', 'avatar', 'batman', 'captain_america', 'clash_of_the_titans', 'game_of_thrones', 'harry_potter', 'high_school_musical', 'hitman', 'iron_man', 'king_kong', 'lost', 'man_of_steel', 'other_movies', 'oz_the_great_and_powerful', 'pirates_of_the_caribbean', 'prince_of_persia', 'real_steel', 'robin_hood', 'sex_and_the_city', 'snow_white_&_the_huntsman', 'spider-man', 'star_trek', 'star_wars', 'sucker_punch', 'the_avengers', 'the_hobbit', 'the_incredible_hulk', 'thor', 'transformers', 'tron_legacy', 'twilight', 'watchmen', 'x-men', 'music', 'nature', 'beach', 'desert', 'flowers', 'forests', 'lakes', 'landscape', 'mountains', 'rivers', 'sun_&_sky', 'waterfalls', 'seasons', 'autumn', 'calendar', 'spring', 'summer', 'winter', 'space', '2016_summer_olympics', 'baseball', 'basketball', 'biking', 'boxing', 'fitness', 'football', 'formula_1', 'free_running', 'golf', 'motorcycle_racing', 'other_sports', 'parkour', 'skateboarding', 'skiing', 'surfing', 'tennis', 'winter_olympic_games', 'wrestling', 'travel', 'africa', 'america', 'antarctica', 'asia', 'europe', 'islands', 'maps', 'oceania', 'other', 'vintage'] | |
const crawler = async (url, page) => { | |
console.log(`Dealing with "${url}"...`) | |
await page.goto(url) | |
await page.waitForSelector('.wallpapers') | |
const wallpapers = await page.$$eval('.wallpapers', wallpaperNodeList => { | |
var paginationArr = document.querySelectorAll('.pagination') | |
var paginations = paginationArr[paginationArr.length -1].children | |
var pageAmount = paginations[paginations.length - 2].text | |
var screenResolution = document.querySelector('#header > div.screen-res > span:nth-child(4) > strong').textContent.replace(/\s+/g, '') | |
var onShowWallpapers = wallpaperNodeList[wallpaperNodeList.length - 1] | |
var wallpapers = Array.from(onShowWallpapers.querySelectorAll('img')).map(img => { | |
var imgName = img.src.replace(/http:\/\/hd\.wallpaperswide\.com\/thumbs\/|\-t1\.jpg/g, '') | |
return { | |
thumb: img.src, | |
name: imgName, | |
downloadUrl: 'http://wallpaperswide.com/download/' + imgName + '-wallpaper-' + screenResolution + '.jpg', | |
total: pageAmount | |
} | |
}) | |
return wallpapers | |
}) | |
return wallpapers | |
} | |
const autoRun = async (cateName, pageIndex, page) => { | |
let index = pageIndex | |
let pageTotal = 0 | |
const url = `http://wallpaperswide.com/${cateName}-desktop-wallpapers/page/${index}` | |
const wallpapers = await crawler(url, page) | |
const cateRaw = fs.readFileSync('./cate.json') | |
const categories = JSON.parse(cateRaw) | |
categories[url] = wallpapers | |
fs.writeFileSync('./cate.json', JSON.stringify(categories, null, 2)) | |
if (wallpapers[0].total) { | |
pageTotal = Number(wallpapers[0].total) | |
} | |
while (index < pageTotal) { | |
index++ | |
await autoRun(cateName, index, page) | |
} | |
return categories | |
} | |
const run = async () => { | |
const browser = await puppeteer.launch({ | |
args: ['--no-sandbox', '--disable-setuid-sandbox'] | |
}) | |
const page = await browser.newPage() | |
while (cateNames.length) { | |
const cateName = cateNames.shift() | |
await autoRun(cateName, 1, page) | |
} | |
await browser.close() | |
} | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment