Created
April 18, 2012 16:59
-
-
Save scragz/2415017 to your computer and use it in GitHub Desktop.
Probably a dumb way to fix ugly latin1 garbage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Unicoder | |
@@char_map = { | |
# (mostly cp1252) => utf8 | |
"\x80" => "\u20AC", # EURO SIGN | |
"\x82" => "\u201A", # SINGLE LOW-9 QUOTATION MARK | |
"\x83" => "\u0192", # LATIN SMALL LETTER F WITH HOOK | |
"\x84" => "\u201E", # DOUBLE LOW-9 QUOTATION MARK | |
"\x85" => "\u2026", # HORIZONTAL ELLIPSIS | |
"\x86" => "\u2020", # DAGGER | |
"\x87" => "\u2021", # DOUBLE DAGGER | |
"\x88" => "\u02C6", # MODIFIER LETTER CIRCUMFLEX ACCENT | |
"\x89" => "\u2030", # PER MILLE SIGN | |
"\x8A" => "\u0160", # LATIN CAPITAL LETTER S WITH CARON | |
"\x8B" => "\u2039", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
"\x8C" => "\u0152", # LATIN CAPITAL LIGATURE OE | |
"\x8E" => "\u017D", # LATIN CAPITAL LETTER Z WITH CARON | |
"\x91" => "\u2018", # LEFT SINGLE QUOTATION MARK | |
"\x92" => "\u2019", # RIGHT SINGLE QUOTATION MARK | |
"\x93" => "\u201C", # LEFT DOUBLE QUOTATION MARK | |
"\x94" => "\u201D", # RIGHT DOUBLE QUOTATION MARK | |
"\x95" => "\u2022", # BULLET | |
"\x96" => "\u2013", # EN DASH | |
"\x97" => "\u2014", # EM DASH | |
"\x98" => "\u02DC", # SMALL TILDE | |
"\x99" => "\u2122", # TRADE MARK SIGN | |
"\x9A" => "\u0161", # LATIN SMALL LETTER S WITH CARON | |
"\x9B" => "\u203A", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
"\x9C" => "\u0153", # LATIN SMALL LIGATURE OE | |
"\x9E" => "\u017E", # LATIN SMALL LETTER Z WITH CARON | |
"\x9F" => "\u0178", # LATIN CAPITAL LETTER Y WITH DIAERESIS | |
"\xA0" => "\u00A0", # NO-BREAK SPACE | |
"\xA1" => "\u00A1", # INVERTED EXCLAMATION MARK | |
"\xA2" => "\u00A2", # CENT SIGN | |
"\xA3" => "\u00A3", # POUND SIGN | |
"\xA4" => "\u00A4", # CURRENCY SIGN | |
"\xA5" => "\u00A5", # YEN SIGN | |
"\xA6" => "\u00A6", # BROKEN BAR | |
"\xA7" => "\u00A7", # SECTION SIGN | |
"\xA8" => "\u00A8", # DIAERESIS | |
"\xA9" => "\u00A9", # COPYRIGHT SIGN | |
"\xAA" => "\u00AA", # FEMININE ORDINAL INDICATOR | |
"\xAB" => "\u00AB", # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK | |
"\xAC" => "\u00AC", # NOT SIGN | |
"\xAD" => "\u00AD", # SOFT HYPHEN | |
"\xAE" => "\u00AE", # REGISTERED SIGN | |
"\xAF" => "\u00AF", # MACRON | |
"\xB0" => "\u00B0", # DEGREE SIGN | |
"\xB1" => "\u00B1", # PLUS-MINUS SIGN | |
"\xB2" => "\u00B2", # SUPERSCRIPT TWO | |
"\xB3" => "\u00B3", # SUPERSCRIPT THREE | |
"\xB4" => "\u00B4", # ACUTE ACCENT | |
"\xB5" => "\u00B5", # MICRO SIGN | |
"\xB6" => "\u00B6", # PILCROW SIGN | |
"\xB7" => "\u00B7", # MIDDLE DOT | |
"\xB8" => "\u00B8", # CEDILLA | |
"\xB9" => "\u00B9", # SUPERSCRIPT ONE | |
"\xBA" => "\u00BA", # MASCULINE ORDINAL INDICATOR | |
"\xBB" => "\u00BB", # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK | |
"\xBC" => "\u00BC", # VULGAR FRACTION ONE QUARTER | |
"\xBD" => "\u00BD", # VULGAR FRACTION ONE HALF | |
"\xBE" => "\u00BE", # VULGAR FRACTION THREE QUARTERS | |
"\xBF" => "\u00BF" # INVERTED QUESTION MARK | |
} | |
class << self | |
def zap(text) | |
return text if !text | |
text.force_encoding("BINARY").encode!('UTF-8', fallback: @@char_map) | |
end | |
end | |
# Mangle all incoming parameters transcoding them to UTF-8 | |
class Middleware | |
def initialize(app) | |
@app = app | |
end | |
def call(env) | |
req = Rack::Request.new(env) | |
mangle(req.params) | |
@app.call(req.env) | |
end | |
def mangle(hsh) | |
hsh.each do |k,v| | |
if v.respond_to?(:force_encoding) | |
Unicoder.zap(v) unless v.valid_encoding? | |
elsif v.kind_of?(Hash) | |
mangle(v) | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment