Skip to content

Instantly share code, notes, and snippets.

@adamhill
Last active February 2, 2025 14:07
Show Gist options
  • Save adamhill/a794804d6b4574e40c6ef767ed69328c to your computer and use it in GitHub Desktop.
Save adamhill/a794804d6b4574e40c6ef767ed69328c to your computer and use it in GitHub Desktop.
Scripts to Remove Space Based Steganography
#!/bin/bash
# Create the Unicode pattern for sed
INVISIBLE_CHARS=(
$'\u200B' # Zero-width space
$'\u200C' # Zero-width non-joiner
$'\u200D' # Zero-width joiner
$'\u2060' # Word joiner
$'\uFEFF' # Zero-width non-breaking space
$'\u180E' # Mongolian vowel separator
$'\u200E' # Left-to-right mark
$'\u200F' # Right-to-left mark
$'\u202A' # Left-to-right embedding
$'\u202B' # Right-to-left embedding
$'\u202C' # Pop directional formatting
$'\u202D' # Left-to-right override
$'\u202E' # Right-to-left override
$'\u2061' # Function application
$'\u2062' # Invisible times
$'\u2063' # Invisible separator
$'\u2064' # Invisible plus
$'\u206A' # Inhibit symmetric swapping
$'\u206B' # Activate symmetric swapping
$'\u206C' # Inhibit arabic form shaping
$'\u206D' # Activate arabic form shaping
$'\u206E' # National digit shapes
$'\u206F' # Nominal digit shapes
$'\u3164' # Hangul filler
$'\u1160' # Hangul jamo filler
)
clean_spaces() {
# Build the sed pattern
pattern=""
for char in "${INVISIBLE_CHARS[@]}"; do
pattern+="$char"
done
# If a file is provided, read from it; otherwise read from stdin
if [ "$1" ]; then
input_text=$(cat "$1")
else
input_text=$(cat)
fi
echo "$input_text" | \
# Remove all invisible characters
sed "s/[$pattern]//g" | \
# Collapse multiple spaces into single spaces
tr -s ' ' | \
# Remove leading/trailing whitespace
sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
}
# Call the function with argument if provided, otherwise use stdin
if [ "$1" ]; then
clean_spaces "$1"
else
clean_spaces
fi
# Make executable
chmod +x clean_spaces.sh
# Use with a file
./clean_spaces.sh input.txt
# Use with pipe
echo "Hidden​‌‍⁠᠎‎‏‪‫‬‭‮⁡⁢⁣⁤‪‫‬‭‮ ㅤᄠmessage here" | ./clean_spaces.sh
# Use interactively
./clean_spaces.sh
# Type text and press Ctrl+D when done
public static class TextCleaner
{
// Collection of invisible Unicode spaces
private static readonly string[] InvisibleSpaces = new[]
{
"\u200B", // Zero-width space
"\u200C", // Zero-width non-joiner
"\u200D", // Zero-width joiner
"\u2060", // Word joiner
"\uFEFF", // Zero-width non-breaking space
"\u180E", // Mongolian vowel separator
"\u200E", // Left-to-right mark
"\u200F", // Right-to-left mark
"\u202A", // Left-to-right embedding
"\u202B", // Right-to-left embedding
"\u202C", // Pop directional formatting
"\u202D", // Left-to-right override
"\u202E", // Right-to-left override
"\u2061", // Function application
"\u2062", // Invisible times
"\u2063", // Invisible separator
"\u2064", // Invisible plus
"\u206A", // Inhibit symmetric swapping
"\u206B", // Activate symmetric swapping
"\u206C", // Inhibit arabic form shaping
"\u206D", // Activate arabic form shaping
"\u206E", // National digit shapes
"\u206F", // Nominal digit shapes
"\u3164", // Hangul filler
"\u1160" // Hangul jamo filler
};
public static string CleanText(string input)
{
if (string.IsNullOrEmpty(input))
return input;
// Step 1: Remove all invisible Unicode spaces
string cleaned = input;
foreach (string invisibleSpace in InvisibleSpaces)
{
cleaned = cleaned.Replace(invisibleSpace, "");
}
// Step 2: Replace multiple spaces with single space
cleaned = string.Join(" ",
cleaned.Split(new[] { ' ' },
StringSplitOptions.RemoveEmptyEntries));
// Step 3: Trim any remaining spaces at start/end
return cleaned.Trim();
}
}
#!/bin/bash
clean_stego() {
temp_file=$(mktemp)
if [ "$1" ]; then
cat "$1" > "$temp_file"
else
cat > "$temp_file"
fi
cat "$temp_file" | \
perl -CSD -pe 's/[\x{200B}\x{200C}\x{200D}\x{2060}\x{FEFF}\x{180E}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2061}-\x{2064}\x{206A}-\x{206F}\x{3164}\x{1160}]//g' | \
tr -s ' ' | \
sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
rm "$temp_file"
}
if [ "$1" ]; then
clean_stego "$1"
else
clean_stego
fi
function Remove-HiddenSpaces {
[CmdletBinding()]
param (
[Parameter(ValueFromPipeline = $true)]
[string]$Text,
[Parameter()]
[string]$Path
)
# Array of invisible Unicode characters
$invisibleChars = @(
[char]0x200B # Zero-width space
[char]0x200C # Zero-width non-joiner
[char]0x200D # Zero-width joiner
[char]0x2060 # Word joiner
[char]0xFEFF # Zero-width non-breaking space
[char]0x180E # Mongolian vowel separator
[char]0x200E # Left-to-right mark
[char]0x200F # Right-to-left mark
[char]0x202A # Left-to-right embedding
[char]0x202B # Right-to-left embedding
[char]0x202C # Pop directional formatting
[char]0x202D # Left-to-right override
[char]0x202E # Right-to-left override
[char]0x2061 # Function application
[char]0x2062 # Invisible times
[char]0x2063 # Invisible separator
[char]0x2064 # Invisible plus
[char]0x206A # Inhibit symmetric swapping
[char]0x206B # Activate symmetric swapping
[char]0x206C # Inhibit arabic form shaping
[char]0x206D # Activate arabic form shaping
[char]0x206E # National digit shapes
[char]0x206F # Nominal digit shapes
[char]0x3164 # Hangul filler
[char]0x1160 # Hangul jamo filler
)
# Get input text either from pipeline/parameter or file
if ($Path) {
$inputText = Get-Content -Path $Path -Raw
}
else {
$inputText = $Text
}
# Remove invisible characters
$cleanText = $inputText
foreach ($char in $invisibleChars) {
$cleanText = $cleanText.Replace($char, '')
}
# Replace multiple spaces with single space and trim
$cleanText = $cleanText -replace '\s+', ' '
$cleanText = $cleanText.Trim()
return $cleanText
}
# Export the function if you want to use it as a module
Export-ModuleMember -Function Remove-HiddenSpaces
# Clean text directly
$text = "Hidden messages​‌‍⁠ might be here"
Remove-HiddenSpaces -Text $text
# Clean text from a file
Remove-HiddenSpaces -Path "input.txt"
# Use with pipeline
"Hidden messages​‌‍⁠ might be here" | Remove-HiddenSpaces
# Save to a file
$text | Remove-HiddenSpaces | Out-File "cleaned.txt"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment