Created
July 20, 2017 13:55
-
-
Save h-lame/e5ec2e5dcfa58abaccb693c28ed28f45 to your computer and use it in GitHub Desktop.
Benchmarking some alternate implementations for checking if a BOM is present in a string and removing it.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark' | |
# you'll want this to be a largeish file that is representative of the kinds | |
# of CSS files you'll be using this on in the real world. I used a compiled | |
# version of application.css from https://github.com/alphagov/government-frontend | |
css = File.binread "bom.css" | |
class BomBench | |
def remove_bom_force_encoding_slice!(input) | |
input.force_encoding('UTF-8').slice!(0) | |
input | |
end | |
def remove_bom_force_encoding_slice(input) | |
input.force_encoding('UTF-8').slice(1..-1) | |
end | |
def remove_bom_force_encoding_char_index(input) | |
input.force_encoding('UTF-8')[1..-1] | |
end | |
def remove_bom_pack(input) | |
input.bytes[3..-1].pack('c*').force_encoding('UTF-8') | |
end | |
def remove_bom_force_encoding_sub(input) | |
input.force_encoding('UTF-8').sub(/^\xEF\xBB\xBF/, '') | |
end | |
def remove_bom_force_encoding_sub!(input) | |
input.force_encoding('UTF-8').sub!(/^\xEF\xBB\xBF/, '') | |
end | |
def remove_bom_dup_force_encoding_slice!(input) | |
out = input.dup.force_encoding('UTF-8') | |
out.slice!(0) | |
out | |
end | |
def remove_bom_dup_force_encoding_slice(input) | |
out = input.dup.force_encoding('UTF-8') | |
out.slice(1..-1) | |
out | |
end | |
def remove_bom_dup_force_encoding_char_index(input) | |
input.dup.force_encoding('UTF-8')[1..-1] | |
end | |
def remove_bom_dup_force_encoding_sub(input) | |
input.dup.force_encoding('UTF-8').sub(/^\xEF\xBB\xBF/, '') | |
end | |
def remove_bom_dup_force_encoding_sub!(input) | |
input.dup.force_encoding('UTF-8').sub!(/^\xEF\xBB\xBF/, '') | |
end | |
def bom_string_force_encoding_reset_full_regexp?(input) | |
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input =~ /^\xEF\xBB\xBF/ } | |
end | |
def bom_string_dup_force_encoding_full_regexp?(input) | |
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input =~ /^\xEF\xBB\xBF/ } | |
end | |
def bom_string_force_encoding_reset_slice_regexp?(input) | |
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input[0..2] =~ /^\xEF\xBB\xBF/ } | |
end | |
def bom_string_dup_force_encoding_slice_regexp?(input) | |
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input[0..2] =~ /^\xEF\xBB\xBF/ } | |
end | |
def bom_string_force_encoding_reset_slice_equality?(input) | |
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input[0] == "\xEF\xBB\xBF" } | |
end | |
def bom_string_dup_force_encoding_slice_equality?(input) | |
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input[0] == "\xEF\xBB\xBF" } | |
end | |
def bom_string_byte_check?(input) | |
with_encoding_byte_check(input, 'UTF-8') { |x| x.bytes[0..2] == [0xEF, 0xBB, 0xBF] } | |
end | |
def with_encoding_force_encoding_reset(string, encoding) | |
old_encoding = string.encoding | |
begin | |
return (yield string.force_encoding(encoding)) | |
ensure | |
string.force_encoding(old_encoding) | |
end | |
end | |
def with_encoding_dup_force_encoding(string, encoding) | |
return (yield string.dup.force_encoding(encoding)) | |
end | |
def with_encoding_byte_check(string, _encoding) | |
yield string | |
end | |
end | |
how_many = 10_000 | |
compressor = BomBench.new | |
Benchmark.bmbm do |x| | |
x.report("check: bytes") { how_many.times { compressor.bom_string_byte_check?(css.dup) } } | |
x.report("check: dup + force_encoding + slice equality") { how_many.times { compressor.bom_string_dup_force_encoding_slice_equality?(css.dup) } } | |
x.report("check: dup + force_encoding + slice regexp") { how_many.times { compressor.bom_string_dup_force_encoding_slice_regexp?(css.dup) } } | |
x.report("check: dup + force_encoding + full regexp") { how_many.times { compressor.bom_string_dup_force_encoding_full_regexp?(css.dup) } } | |
x.report("check: force_encoding + reset + slice equality") { how_many.times { compressor.bom_string_force_encoding_reset_slice_equality?(css.dup) } } | |
x.report("check: force_encoding + reset + slice regexp") { how_many.times { compressor.bom_string_force_encoding_reset_slice_regexp?(css.dup) } } | |
x.report("check: force_encoding + reset + full regexp") { how_many.times { compressor.bom_string_force_encoding_reset_full_regexp?(css.dup) } } | |
end | |
Benchmark.bmbm do |x| | |
x.report("remove bom: pack") { how_many.times { compressor.remove_bom_pack(css.dup) } } | |
x.report("remove bom: force encoding + slice!") { how_many.times { compressor.remove_bom_force_encoding_slice!(css.dup) } } | |
x.report("remove bom: force encoding + slice") { how_many.times { compressor.remove_bom_force_encoding_slice(css.dup) } } | |
x.report("remove bom: force encoding + char index") { how_many.times { compressor.remove_bom_force_encoding_char_index(css.dup) } } | |
x.report("remove bom: force encoding sub!") { how_many.times { compressor.remove_bom_force_encoding_sub!(css.dup) } } | |
x.report("remove bom: force encoding sub") { how_many.times { compressor.remove_bom_force_encoding_sub(css.dup) } } | |
x.report("remove bom: dup + force encoding + slice!") { how_many.times { compressor.remove_bom_dup_force_encoding_slice!(css.dup) } } | |
x.report("remove bom: dup + force encoding + slice") { how_many.times { compressor.remove_bom_dup_force_encoding_slice(css.dup) } } | |
x.report("remove bom: dup + force encoding + char index") { how_many.times { compressor.remove_bom_dup_force_encoding_char_index(css.dup) } } | |
x.report("remove bom: dup + force encoding sub!") { how_many.times { compressor.remove_bom_dup_force_encoding_sub!(css.dup) } } | |
x.report("remove bom: dup + force encoding sub") { how_many.times { compressor.remove_bom_dup_force_encoding_sub(css.dup) } } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment