-
-
Save chastell/d1866814d0ecfe7bcfdb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
EXPECTED_TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
require 'active_support/core_ext/string' | |
require 'minitest/autorun' | |
require 'minitest/pride' | |
require 'unindent' | |
class String | |
# Try to optimize by avoiding splitting into an array | |
def unindent_scan | |
indent_str = nil | |
scan(/^[\t ]*(?=\S)/) do |s| | |
indent_str ||= s | |
indent_str = (s.size < indent_str.size) ? s : indent_str | |
end | |
if indent_str | |
gsub(/^#{indent_str}/, "") | |
end | |
end | |
# This version tries to avoid a second traversal of the string by | |
# saving a list of offsets. However, it winds up being | |
# slower. Perhaps this is a sign of how well-optimized #gsub is? | |
def unindent_offsets | |
min_indent = nil | |
offsets = [] | |
scan(/^[\t ]*(?=\S)/) do |s| | |
offsets << $~.offset(0).first | |
min_indent ||= s.size | |
min_indent = (s.size < min_indent.size) ? s.size : min_indent | |
end | |
if min_indent | |
result = dup | |
shift = 0 | |
offsets.each do |offset| | |
result[offset - shift, min_indent] = "" | |
shift += min_indent | |
end | |
result | |
else | |
self | |
end | |
end | |
def unindent_by_min_dent | |
dent = split("\n").reject(&:empty?).map { |line| line[/^\s*/] }.min_by(&:size) | |
gsub(/^#{dent}/, '') | |
end | |
end | |
class TestUnindent < MiniTest::Test | |
def test_unindent_gem | |
assert_equal EXPECTED_TEXT, TEXT.unindent | |
end | |
def test_activesupport | |
assert_equal EXPECTED_TEXT, TEXT.strip_heredoc | |
end | |
def test_scan | |
assert_equal EXPECTED_TEXT, TEXT.unindent_scan | |
end | |
def test_offsets | |
assert_equal EXPECTED_TEXT, TEXT.unindent_offsets | |
end | |
def test_by_min_dent | |
assert_equal EXPECTED_TEXT, TEXT.unindent_by_min_dent | |
end | |
end | |
require 'benchmark/ips' | |
Benchmark.ips do |x| | |
x.report('unindent gem') { TEXT.unindent } | |
x.report('activesupport') { TEXT.strip_heredoc } | |
x.report('scan') { TEXT.unindent_scan } | |
x.report('offsets') { TEXT.unindent_offsets } | |
x.report('min dent') { TEXT.unindent_by_min_dent } | |
end | |
# Calculating ------------------------------------- | |
# unindent gem 1041 i/100ms | |
# activesupport 1319 i/100ms | |
# scan 1605 i/100ms | |
# offsets 1157 i/100ms | |
# min dent 1892 i/100ms | |
# ------------------------------------------------- | |
# unindent gem 11316.3 (±0.7%) i/s - 57255 in 5.059733s | |
# activesupport 14493.9 (±0.6%) i/s - 72545 in 5.005421s | |
# scan 17936.8 (±0.7%) i/s - 89880 in 5.011144s | |
# offsets 12385.2 (±1.3%) i/s - 62478 in 5.045426s | |
# min dent 21625.3 (±1.1%) i/s - 109736 in 5.075009s | |
# Run options: --seed 1239 | |
# | |
# # Running: | |
# | |
# ..... | |
# | |
# Fabulous run in 0.001733s, 2885.9696 runs/s, 2885.9696 assertions/s. | |
# | |
# 5 runs, 5 assertions, 0 failures, 0 errors, 0 skips |
Hat tip to the great @sferik for introducing me to benchmark-ips
in his Writing Fast Ruby talk.
And now there’s this: https://gist.github.com/danielfone/eacaf4a1f1d7f2ad425f :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Please
gem install activesupport benchmark-ips minitest unindent
before running this.Numbers for MRI 2.1.2 (from ruby-install + chruby) on x86_64-linux – Ubuntu 14.04 on i7-3517U.