Skip to content

Instantly share code, notes, and snippets.

@iwalton3
Last active June 26, 2025 00:12
Show Gist options
  • Save iwalton3/d98b779fdeb19e4782f469a672b05cc0 to your computer and use it in GitHub Desktop.
Save iwalton3/d98b779fdeb19e4782f469a672b05cc0 to your computer and use it in GitHub Desktop.
AI Detector - Detect AI "slop" articles before you waste your time reading them.
// ==UserScript==
// @name AI Detector
// @version 3
// @grant GM.xmlHttpRequest
// @include *
// @connect 127.0.0.1:5000
// @description Detects AI-generated text on a webpage and highlights it.
// ==/UserScript==
const server = 'http://127.0.0.1:5000';
const existingChildren = new Set();
const cache = new Map();
function getTextContent(el, useExtendedLogic = true) {
const text = el.innerText ? el.innerText.trim() : el.textContent.trim();
if (text.length > 1000 && useExtendedLogic) {
// this is a long text, iterate through it and remove short text nodes
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
const longTextNodes = [];
while (walker.nextNode()) {
const node = walker.currentNode;
if (node.textContent.trim().length >= 250 &&
node.parentElement.tagName !== "SCRIPT" &&
node.parentElement.tagName !== "STYLE" &&
node.parentElement.tagName !== "NOSCRIPT") {
longTextNodes.push(node);
}
}
if (longTextNodes.length > 0) {
return longTextNodes.map(node => getTextContent(node, false)).join(' ');
}
}
return text;
}
function countLeafNodes(el, cache = new Map()) {
// If this is a text node, it's a leaf
if (el.nodeType === Node.TEXT_NODE || (el.nodeType === Node.ELEMENT_NODE && el.children.length === 0)) {
return 1;
}
// Check if we already computed this element's leaf count
if (cache.has(el)) {
return cache.get(el);
}
let count = 1;
if (el.nodeType === Node.ELEMENT_NODE) {
for (const child of el.childNodes) {
count += countLeafNodes(child, cache);
}
}
cache.set(el, count);
return count;
}
function getTextNodes(el) {
const children = [];
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
while (walker.nextNode()) {
if (
walker.currentNode.parentElement.tagName === "SCRIPT" ||
walker.currentNode.parentElement.tagName === "STYLE" ||
walker.currentNode.parentElement.tagName === "NOSCRIPT"
) {
continue; // Skip text nodes inside script, style, or noscript elements
}
if (getTextContent(walker.currentNode).length < 250) {
continue;
}
if (existingChildren.has(walker.currentNode)) {
continue;
}
existingChildren.add(walker.currentNode);
children.push(walker.currentNode);
}
if (el === document) {
let additional = Array.from(document.querySelectorAll('ul'));
additional = additional.concat(Array.from(document.querySelectorAll('ol')));
additional = additional.concat(Array.from(document.querySelectorAll('p')));
const existingRawText = new Set(children.map(node => getTextContent(node)));
for (const el of additional) {
if (getTextContent(el).length < 250) {
continue; // Skip elements with less than 250 characters
}
if (existingRawText.has(getTextContent(el)) || existingChildren.has(el)) {
continue; // Skip elements that already have their text content in the list
}
let has_long_text = false;
for (const child of getTextNodes(el)) {
if (getTextContent(child).length >= 50) {
has_long_text = true;
break; // Stop checking if we found a long text node
}
}
if (!has_long_text) {
continue; // Skip elements without long text nodes
}
existingChildren.add(el);
children.push(el);
}
// Find the parent element shared by the most children (avoiding nav/sidebar elements)
const parentCounts = new Map();
// Function to check if an element should be excluded
function shouldExcludeElement(element) {
const tagName = element.tagName.toLowerCase();
if (['nav', 'header', 'footer', 'aside'].includes(tagName)) {
return true;
}
const className = element.className || '';
const id = element.id || '';
const excludeKeywords = ['nav', 'menu', 'sidebar', 'header', 'footer', 'advertisement', 'ads', 'banner'];
for (const keyword of excludeKeywords) {
if (className.toLowerCase().includes(keyword) || id.toLowerCase().includes(keyword)) {
return true;
}
}
return false;
}
// Count how many children each parent element has, excluding navigation/sidebar elements
for (const child of children) {
let parent = child.nodeType === 3 ? child.parentElement : child.parentElement;
// Walk up the DOM tree to find suitable parent candidates
while (parent && parent !== document.body) {
if (!shouldExcludeElement(parent)) {
parentCounts.set(parent, (parentCounts.get(parent) || 0) + 1);
}
parent = parent.parentElement;
}
}
// Find the parent with the most children that has substantial content
let mostCommonParent = null;
let maxScore = 0;
const documentNodes = countLeafNodes(document.body, cache);
for (const [parent, count] of parentCounts) {
// favor parents with the highest score
const score = count * (1 - ((countLeafNodes(parent, cache) || 1) / documentNodes));
if (score > maxScore) {
if (shouldExcludeElement(parent)) {
continue; // Skip if the parent is a navigation or sidebar element
}
maxScore = count;
mostCommonParent = parent;
}
}
if (mostCommonParent && !existingChildren.has(mostCommonParent)) {
existingChildren.add(mostCommonParent);
children.push(mostCommonParent);
}
}
return children;
}
function highlightText() {
const nodes = getTextNodes(document);
const nodesAsText = nodes.map(node => getTextContent(node));
GM.xmlHttpRequest({
method: 'POST',
url: server,
headers: {
'Content-Type': 'application/json'
},
data: JSON.stringify(nodesAsText),
onload: function (result) {
const response = JSON.parse(result.responseText);
if (response && response.length > 0) {
response.forEach((item, index) => {
if (item.label === "AI" && item.score > 0.90) {
if (nodes[index].nodeType == Node.TEXT_NODE) {
nodes[index].parentElement.style =
"background-color: #FF55FF55 !important; " +
nodes[index].parentElement.style;
} else {
nodes[index].style =
"background-color: #FF55FF55 !important; " +
nodes[index].style;
}
}
});
}
}
});
}
let queued = true;
setTimeout(() => {
highlightText();
queued = false;
}, 1000);
// rerun when there are significant changes in the DOM
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
cache.delete(mutation.target);
for (const node of mutation.removedNodes) {
if (node.nodeType === Node.ELEMENT_NODE) {
cache.delete(node);
}
}
if (!queued && mutation.type === 'childList' && mutation.addedNodes.length > 0) {
queued = true;
setTimeout(() => {
highlightText();
queued = false;
}, 1000);
}
});
});
observer.observe(document.body, {
childList: true,
subtree: true
});
#!/usr/bin/env python3
from flask import Flask, jsonify, request
import argparse
# Uses https://huggingface.co/desklib/ai-text-detector-v1.01
# This one is slightly more reliable but needs way more vram, is slower, and uses WAY more electricity.
parser = argparse.ArgumentParser(description='Deepfake detector')
parser.add_argument("--port", type=int, default=5000, help="Port to listen on.")
parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.")
args = parser.parse_args()
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel
class DesklibAIDetectionModel(PreTrainedModel):
config_class = AutoConfig
def __init__(self, config):
super().__init__(config)
# Initialize the base transformer model.
self.model = AutoModel.from_config(config)
# Define a classifier head.
self.classifier = nn.Linear(config.hidden_size, 1)
# Initialize weights (handled by PreTrainedModel)
self.init_weights()
def forward(self, input_ids, attention_mask=None, labels=None):
# Forward pass through the transformer
outputs = self.model(input_ids, attention_mask=attention_mask)
last_hidden_state = outputs[0]
# Mean pooling
input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1)
sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
pooled_output = sum_embeddings / sum_mask
# Classifier
logits = self.classifier(pooled_output)
loss = None
if labels is not None:
loss_fct = nn.BCEWithLogitsLoss()
loss = loss_fct(logits.view(-1), labels.float())
output = {"logits": logits}
if loss is not None:
output["loss"] = loss
return output
def predict_single_text(text, model, tokenizer, device, max_len=768, threshold=0.5):
encoded = tokenizer(
text,
padding='max_length',
truncation=True,
max_length=max_len,
return_tensors='pt'
)
input_ids = encoded['input_ids'].to(device)
attention_mask = encoded['attention_mask'].to(device)
model.eval()
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs["logits"]
probability = torch.sigmoid(logits).item()
label = 'AI' if probability >= threshold else 'Human'
return { "label": label, "score": probability }
model_directory = "."
tokenizer = AutoTokenizer.from_pretrained(model_directory)
model = DesklibAIDetectionModel.from_pretrained(model_directory)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
app = Flask(__name__)
@app.route('/', methods = ['POST'])
def index():
data = request.get_json()
try:
return jsonify([predict_single_text(f, model, tokenizer, device) for f in data])
except:
return jsonify({ "error": 'unhandled exception' })
if __name__ == '__main__':
app.run(host=args.ip, threaded=True, port=args.port)
#!/usr/bin/env python3
from flask import Flask, jsonify, request
import argparse
# Uses https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
# This one is pretty good and light on vram, but not as good as the slower model.
parser = argparse.ArgumentParser(description='Deepfake detector')
parser.add_argument("--port", type=int, default=5000, help="Port to listen on.")
parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.")
args = parser.parse_args()
from transformers import pipeline
from utils import clean_text
classifier = pipeline(
"text-classification",
model=".",
)
app = Flask(__name__)
@app.route('/', methods = ['POST'])
def index():
data = request.get_json()
try:
return jsonify(classifier([clean_text(f) for f in data], truncation=True))
except:
return jsonify({ "error": 'unhandled exception' })
if __name__ == '__main__':
app.run(host=args.ip, threaded=True, port=args.port)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment