Last active
December 21, 2015 06:48
-
-
Save geta6/6266566 to your computer and use it in GitHub Desktop.
アニメデータベースをパースします
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fs = require 'fs' | |
util = require 'util' | |
http = require 'http' | |
async = require 'async' | |
{Iconv} = require 'iconv' | |
cheerio = require 'cheerio' | |
multimeter = require 'multimeter' | |
multi = multimeter process | |
class AnimeWiki | |
iconv = new Iconv 'EUC-JP', 'UTF8//TRANSLIT//IGNORE' | |
url = 'http://wiki.livedoor.jp' | |
get = (addr, callback) -> | |
http.get "#{url}#{addr}", (res) -> | |
data = '' | |
res.setEncoding 'binary' | |
res.on 'data', (chunk) -> data += chunk | |
res.on 'end', -> | |
html = iconv.convert(new Buffer data, 'binary').toString() | |
return callback html | |
cosntructor: -> | |
titles: (callback) -> | |
get '/radioi_34/l/?order=name&on_desc=1', (html) -> | |
$ = cheerio.load html | |
pages = [] | |
$('.main th a').each (i, el) -> | |
el = $ el | |
pages.push | |
name: el.text() | |
href: el.attr 'href' | |
return callback pages | |
staffBar = null | |
i = 0 | |
staffParser = (addr, callback = ->) -> | |
if addr?.href? | |
addr = addr.href | |
get addr, (html) -> | |
staffBar.percent ++i | |
$ = cheerio.load html | |
html = $('#wikibody .main').text() | |
info = | |
title: $('#wikibody .title h1').text() | |
info: {} | |
for line in html.split('\n') when /:/.test line | |
[key, val] = line.split /[::]/ | |
key = key.replace /[ \t]/, '' | |
val = val.split /[ \t]/ | |
info.info[key] = val | |
return callback null, info | |
staffs: (addr, callback) -> | |
if Array.isArray addr | |
multi.drop (bar) -> | |
staffBar = bar | |
async.mapSeries addr, staffParser, (err, info) -> | |
return callback info | |
else | |
staffParser addr, (err, info) -> | |
return callback info | |
aw = new AnimeWiki | |
aw.titles (pages) -> | |
aw.staffs pages, (info) -> | |
console.log util.inspect info, depth: 6 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http = require 'http' | |
async = require 'async' | |
{Iconv} = require 'iconv' | |
cheerio = require 'cheerio' | |
url = 'http://wiki.livedoor.jp' | |
iconv = new Iconv 'EUC-JP', 'UTF8//TRANSLIT//IGNORE' | |
httpget = (addr, callback) -> | |
http.get "#{url}#{addr}", (res) -> | |
data = '' | |
res.setEncoding 'binary' | |
res.on 'data', (chunk) -> data += chunk | |
res.on 'end', -> | |
html = iconv.convert(new Buffer data, 'binary').toString() | |
return callback html | |
httpget '/radioi_34/l/?order=name&on_desc=1', (html) -> | |
$ = cheerio.load html | |
pages = [] | |
$('.main th a').each (i, el) -> | |
pages.push $(el).attr('href') | |
async.eachSeries pages, (page, next) -> | |
httpget page, (html) -> | |
$ = cheerio.load html | |
html = $('#wikibody .main').text() | |
title = $('#wikibody .title h1').text() | |
info = [] | |
kobe = {} | |
for line in html.split('\n') when /:/.test line | |
[key, val] = line.split /[::]/ | |
key = key.replace /[ ]/, '' | |
val = val.split /[ ]/ | |
kobe[key] = val | |
info.push | |
title: title | |
info: kobe | |
console.log info | |
next new Error 'break' | |
, (err) -> | |
console.error err if err | |
process.exit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment