Created
February 24, 2025 21:15
-
-
Save ArcHound/df86e646bb779f758a05a133f9f28594 to your computer and use it in GitHub Desktop.
Shannon Entropy Secret Detections
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from math import log | |
SECRET_THRESHOLD = 4 | |
FILENAME = "test.py" | |
def word_entropy(s: str): | |
"""Calculates Shannon entropy for the word *s*""" | |
counter = dict() | |
for c in s: | |
if c not in counter: | |
counter[c] = 0 | |
counter[c] += 1 | |
# frequence of characters as ratio | |
freqs = [counter[i] / float(len(s)) for i in counter] | |
return -1 * sum([f * log(f, 2) for f in freqs]) | |
def find_secrets_in_text(text, secret_threshold): | |
"""Returns words from *text* that have Shannon entropy above the *secret_threshold*.""" | |
words = " ".join(text.split("\n")).split(" ") | |
secrets = [w for w in words if word_entropy(w) > secret_threshold] | |
return secrets | |
if __name__ == "__main__": | |
filename = FILENAME | |
with open(filename, "r") as f: | |
data = f.read() | |
for s in find_secrets_in_text(data, SECRET_THRESHOLD): | |
print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment