|
#!/usr/bin/ruby |
|
# |
|
# usage: ruby draw-relme-links.rb start-url |
|
# |
|
require "nokogiri" |
|
require "http" |
|
require "ruby-graphviz" |
|
|
|
def normalized_url(url) |
|
unless url |
|
return nil |
|
end |
|
r = url.encode("ASCII").chomp |
|
if r.empty? |
|
return nil |
|
end |
|
return r |
|
end |
|
|
|
def compact_url(url) |
|
return url.sub(/\Ahttps?:\/\//i, "").sub(/\/\z/, "").sub(/\//, "\n/").chomp |
|
end |
|
|
|
xpath_handler = Class.new { |
|
def relme(node_set) |
|
node_set.find_all{|node| |
|
node['rel']&.match(/\bme\b/i) |
|
} |
|
end |
|
}.new |
|
def relme_links(body, handler) |
|
Nokogiri::HTML5(body).xpath('//a[nokogiri:relme(.)]|//link[nokogiri:relme(.)]', handler) |
|
end |
|
|
|
url = ARGV.shift |
|
unless url |
|
$stderr.puts "usage: bundle exec ruby #{$0} https://example.com/your-profile" |
|
exit 1 |
|
end |
|
|
|
urls = [URI.parse(normalized_url(url))] |
|
links = {} |
|
|
|
while not urls.empty? |
|
src = urls.shift |
|
$stderr.puts "Checking #{src}" |
|
res = HTTP.headers(:accept => "text/html").get(src) |
|
targets = relme_links(res.body, xpath_handler).map{|link| |
|
dst = URI.parse(normalized_url(link['href'])) |
|
dst = src.merge(dst.to_s) unless dst.absolute? |
|
dst |
|
}.uniq.compact |
|
links[src] = targets |
|
urls += targets - urls - links.keys |
|
end |
|
|
|
links.values.flatten.sort_by{_1.to_s}.uniq.each do |dst| |
|
puts "#{dst} <-" |
|
links.keys.sort_by{_1.to_s}.each do |src| |
|
puts "\t#{src}" if links[src].include?(dst) |
|
end |
|
end |
|
|
|
nodes = [] |
|
edges = Hash.new{|h,k| h[k] = []} |
|
links.keys.each do |src| |
|
sc = compact_url(src.to_s) |
|
nodes << sc |
|
dcs = links[src].map{|dst| compact_url(dst.to_s)} |
|
edges[sc] << dcs |
|
nodes += dcs |
|
end |
|
|
|
nodes.uniq! |
|
edges.each_pair do |sc, dcs| |
|
edges[sc] = dcs.flatten.uniq |
|
end |
|
|
|
g = GraphViz.new( :G, :type => :digraph ) |
|
gn = nodes.map{|url| [url, g.add_nodes(url)]}.to_h |
|
|
|
require 'pp' |
|
pp edges |
|
edges.keys.each do |src| |
|
edges[src].each do |dst| |
|
e = g.add_edges(gn[src], gn[dst]) |
|
if edges[dst].include?(src) |
|
edges[dst].delete(src) |
|
e.set do |_e| |
|
_e.dir = "both" |
|
_e.color = "forestgreen" |
|
end |
|
else |
|
e.set do |_e| |
|
_e.color = "darkgray" |
|
end |
|
end |
|
end |
|
end |
|
|
|
g.output(:png => "relme.png") |