Last active
February 2, 2025 14:07
-
-
Save adamhill/a794804d6b4574e40c6ef767ed69328c to your computer and use it in GitHub Desktop.
Scripts to Remove Space Based Steganography
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Create the Unicode pattern for sed | |
INVISIBLE_CHARS=( | |
$'\u200B' # Zero-width space | |
$'\u200C' # Zero-width non-joiner | |
$'\u200D' # Zero-width joiner | |
$'\u2060' # Word joiner | |
$'\uFEFF' # Zero-width non-breaking space | |
$'\u180E' # Mongolian vowel separator | |
$'\u200E' # Left-to-right mark | |
$'\u200F' # Right-to-left mark | |
$'\u202A' # Left-to-right embedding | |
$'\u202B' # Right-to-left embedding | |
$'\u202C' # Pop directional formatting | |
$'\u202D' # Left-to-right override | |
$'\u202E' # Right-to-left override | |
$'\u2061' # Function application | |
$'\u2062' # Invisible times | |
$'\u2063' # Invisible separator | |
$'\u2064' # Invisible plus | |
$'\u206A' # Inhibit symmetric swapping | |
$'\u206B' # Activate symmetric swapping | |
$'\u206C' # Inhibit arabic form shaping | |
$'\u206D' # Activate arabic form shaping | |
$'\u206E' # National digit shapes | |
$'\u206F' # Nominal digit shapes | |
$'\u3164' # Hangul filler | |
$'\u1160' # Hangul jamo filler | |
) | |
clean_spaces() { | |
# Build the sed pattern | |
pattern="" | |
for char in "${INVISIBLE_CHARS[@]}"; do | |
pattern+="$char" | |
done | |
# If a file is provided, read from it; otherwise read from stdin | |
if [ "$1" ]; then | |
input_text=$(cat "$1") | |
else | |
input_text=$(cat) | |
fi | |
echo "$input_text" | \ | |
# Remove all invisible characters | |
sed "s/[$pattern]//g" | \ | |
# Collapse multiple spaces into single spaces | |
tr -s ' ' | \ | |
# Remove leading/trailing whitespace | |
sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | |
} | |
# Call the function with argument if provided, otherwise use stdin | |
if [ "$1" ]; then | |
clean_spaces "$1" | |
else | |
clean_spaces | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make executable | |
chmod +x clean_spaces.sh | |
# Use with a file | |
./clean_spaces.sh input.txt | |
# Use with pipe | |
echo "Hidden ㅤᄠmessage here" | ./clean_spaces.sh | |
# Use interactively | |
./clean_spaces.sh | |
# Type text and press Ctrl+D when done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class TextCleaner | |
{ | |
// Collection of invisible Unicode spaces | |
private static readonly string[] InvisibleSpaces = new[] | |
{ | |
"\u200B", // Zero-width space | |
"\u200C", // Zero-width non-joiner | |
"\u200D", // Zero-width joiner | |
"\u2060", // Word joiner | |
"\uFEFF", // Zero-width non-breaking space | |
"\u180E", // Mongolian vowel separator | |
"\u200E", // Left-to-right mark | |
"\u200F", // Right-to-left mark | |
"\u202A", // Left-to-right embedding | |
"\u202B", // Right-to-left embedding | |
"\u202C", // Pop directional formatting | |
"\u202D", // Left-to-right override | |
"\u202E", // Right-to-left override | |
"\u2061", // Function application | |
"\u2062", // Invisible times | |
"\u2063", // Invisible separator | |
"\u2064", // Invisible plus | |
"\u206A", // Inhibit symmetric swapping | |
"\u206B", // Activate symmetric swapping | |
"\u206C", // Inhibit arabic form shaping | |
"\u206D", // Activate arabic form shaping | |
"\u206E", // National digit shapes | |
"\u206F", // Nominal digit shapes | |
"\u3164", // Hangul filler | |
"\u1160" // Hangul jamo filler | |
}; | |
public static string CleanText(string input) | |
{ | |
if (string.IsNullOrEmpty(input)) | |
return input; | |
// Step 1: Remove all invisible Unicode spaces | |
string cleaned = input; | |
foreach (string invisibleSpace in InvisibleSpaces) | |
{ | |
cleaned = cleaned.Replace(invisibleSpace, ""); | |
} | |
// Step 2: Replace multiple spaces with single space | |
cleaned = string.Join(" ", | |
cleaned.Split(new[] { ' ' }, | |
StringSplitOptions.RemoveEmptyEntries)); | |
// Step 3: Trim any remaining spaces at start/end | |
return cleaned.Trim(); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
clean_stego() { | |
temp_file=$(mktemp) | |
if [ "$1" ]; then | |
cat "$1" > "$temp_file" | |
else | |
cat > "$temp_file" | |
fi | |
cat "$temp_file" | \ | |
perl -CSD -pe 's/[\x{200B}\x{200C}\x{200D}\x{2060}\x{FEFF}\x{180E}\x{200E}\x{200F}\x{202A}-\x{202E}\x{2061}-\x{2064}\x{206A}-\x{206F}\x{3164}\x{1160}]//g' | \ | |
tr -s ' ' | \ | |
sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | |
rm "$temp_file" | |
} | |
if [ "$1" ]; then | |
clean_stego "$1" | |
else | |
clean_stego | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Remove-HiddenSpaces { | |
[CmdletBinding()] | |
param ( | |
[Parameter(ValueFromPipeline = $true)] | |
[string]$Text, | |
[Parameter()] | |
[string]$Path | |
) | |
# Array of invisible Unicode characters | |
$invisibleChars = @( | |
[char]0x200B # Zero-width space | |
[char]0x200C # Zero-width non-joiner | |
[char]0x200D # Zero-width joiner | |
[char]0x2060 # Word joiner | |
[char]0xFEFF # Zero-width non-breaking space | |
[char]0x180E # Mongolian vowel separator | |
[char]0x200E # Left-to-right mark | |
[char]0x200F # Right-to-left mark | |
[char]0x202A # Left-to-right embedding | |
[char]0x202B # Right-to-left embedding | |
[char]0x202C # Pop directional formatting | |
[char]0x202D # Left-to-right override | |
[char]0x202E # Right-to-left override | |
[char]0x2061 # Function application | |
[char]0x2062 # Invisible times | |
[char]0x2063 # Invisible separator | |
[char]0x2064 # Invisible plus | |
[char]0x206A # Inhibit symmetric swapping | |
[char]0x206B # Activate symmetric swapping | |
[char]0x206C # Inhibit arabic form shaping | |
[char]0x206D # Activate arabic form shaping | |
[char]0x206E # National digit shapes | |
[char]0x206F # Nominal digit shapes | |
[char]0x3164 # Hangul filler | |
[char]0x1160 # Hangul jamo filler | |
) | |
# Get input text either from pipeline/parameter or file | |
if ($Path) { | |
$inputText = Get-Content -Path $Path -Raw | |
} | |
else { | |
$inputText = $Text | |
} | |
# Remove invisible characters | |
$cleanText = $inputText | |
foreach ($char in $invisibleChars) { | |
$cleanText = $cleanText.Replace($char, '') | |
} | |
# Replace multiple spaces with single space and trim | |
$cleanText = $cleanText -replace '\s+', ' ' | |
$cleanText = $cleanText.Trim() | |
return $cleanText | |
} | |
# Export the function if you want to use it as a module | |
Export-ModuleMember -Function Remove-HiddenSpaces |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clean text directly | |
$text = "Hidden messages might be here" | |
Remove-HiddenSpaces -Text $text | |
# Clean text from a file | |
Remove-HiddenSpaces -Path "input.txt" | |
# Use with pipeline | |
"Hidden messages might be here" | Remove-HiddenSpaces | |
# Save to a file | |
$text | Remove-HiddenSpaces | Out-File "cleaned.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment