Last active
November 3, 2017 13:24
-
-
Save bouk/bc4b1f406c3d4e83d28ade3f1b18a4e0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Copyright Bouke van der Bijl | |
require 'nokogiri' | |
# This script interpretes the HTTPS everywhere rulesets and extracts a list of hosts which are eligible for a 'simple' redirect, | |
# e.g. where http://example.com needs to be redirected to https://example.com | |
def is_simple?(rule) | |
rule.attributes['from'].value == "^http:" && rule.attributes['to'].value == "https:" | |
end | |
Dir["rules/*.xml"].each do |name| | |
doc = Nokogiri::XML(File.open(name)) | |
doc.xpath('//ruleset').each do |set| | |
next if set.attributes['default_off'] || set.attributes['platform']&.value == 'mixedcontent' | |
next if set.xpath('exclusion').any? | |
rules = set.xpath('rule') | |
if rules.count == 1 | |
rule = rules.first | |
next unless is_simple?(rule) | |
set.xpath('target').each do |t| | |
puts t.attributes['host'].value.sub(/\A\*\./, '*') | |
end | |
else | |
set.xpath('target').each do |t| | |
if t.attributes['host'].value.start_with?('*.') | |
# Can't guarantee nothing breaks, might mean complicated regexes | |
next | |
end | |
host = t.attributes['host'].value | |
u = "http://#{host}/" | |
# Iterate over all the rules and if it matches one that isn't the simple rule, abort | |
rules.each do |rule| | |
r = Regexp.new(rule.attributes['from'].value) | |
if r.match(u) | |
if is_simple?(rule) | |
puts t.attributes['host'].value | |
end | |
break | |
end | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment