|
$longMessage = @"
|
|
This is a long message.
|
|
It has several lines.
|
|
Some are indented
|
|
more than others.
|
|
Some should start at the first column.
|
|
Some have "quoted text" in them.
|
|
"@
|
|
|
|
function TrimPredent {
|
|
<#
|
|
.SYNOPSIS
|
|
Using substr works, but it could break with non-ascii whitespace
|
|
.description
|
|
I didn't use trim* because that requires specific chars, or, non-specific numbers.
|
|
#>
|
|
param( [string] $Content )
|
|
|
|
$regexPredent = '^(\s*?)(?=\S)'
|
|
[Text.RegularExpressions.RegexOptions] $opts = 'MultiLine', 'IgnoreCase'
|
|
|
|
$found = [Regex]::Matches( $Content, $regexPredent, $opts )
|
|
$shortest = $found | Sort-Object Length -Top 1
|
|
|
|
@( foreach($line in $Content -split '\r?\n' ) {
|
|
$ignored = [Math]::Min( $line.Length, $shortest.Value.Length )
|
|
$line.Substring( $ignored ) # clamp length because substring throws when out of bounds
|
|
} ) -join "`n"
|
|
}
|
|
function StripPredent {
|
|
<#
|
|
.synopsis
|
|
Overly complicated function to strip indented here-strings. Like python or c#.
|
|
.DESCRIPTION
|
|
Potentially more robust when non-ascii whitespace is involved. Because
|
|
substring may break surrogate pairs, or other codepoints larger than a char
|
|
regular String/char length is not the same offset as the 'character'/rune/codepoint
|
|
- char is 2 bytes
|
|
- utf-16 is 2 or 4 bytes
|
|
- utf-8 is 1 to 4 bytes
|
|
#>
|
|
param( [string] $Content )
|
|
|
|
$regexPredent = '^(\s*?)(?=\S)'
|
|
# lazily match the shortest whitespace transition to non-whitespace
|
|
[System.Text.RegularExpressions.RegexOptions] $opts = 'MultiLine'
|
|
$found = [Regex]::Matches( $Content, $regexPredent, $opts )
|
|
|
|
# numeric ( $found|Measure-Object -Minimum -Property Length ).Minimum
|
|
# or sort. optionally keep string. To ensure unicode isn't truncated
|
|
$shortest = $found | Sort-Object Length -Top 1
|
|
$toReplace = '^' + $shortest.Value + '(?=\S)'
|
|
|
|
$lines = ($Content -split '\r?\n') -replace $toReplace, ''
|
|
$lines -join "`n"
|
|
|
|
}
|
|
# h1 'origi'
|
|
# $stuff | fcc
|
|
# h1 'StripPredent'
|
|
TrimPredent $longMessage
|
|
StripPredent $longMessage
|