Last active
June 26, 2025 00:12
-
-
Save iwalton3/d98b779fdeb19e4782f469a672b05cc0 to your computer and use it in GitHub Desktop.
AI Detector - Detect AI "slop" articles before you waste your time reading them.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name AI Detector | |
// @version 3 | |
// @grant GM.xmlHttpRequest | |
// @include * | |
// @connect 127.0.0.1:5000 | |
// @description Detects AI-generated text on a webpage and highlights it. | |
// ==/UserScript== | |
const server = 'http://127.0.0.1:5000'; | |
const existingChildren = new Set(); | |
const cache = new Map(); | |
function getTextContent(el, useExtendedLogic = true) { | |
const text = el.innerText ? el.innerText.trim() : el.textContent.trim(); | |
if (text.length > 1000 && useExtendedLogic) { | |
// this is a long text, iterate through it and remove short text nodes | |
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT); | |
const longTextNodes = []; | |
while (walker.nextNode()) { | |
const node = walker.currentNode; | |
if (node.textContent.trim().length >= 250 && | |
node.parentElement.tagName !== "SCRIPT" && | |
node.parentElement.tagName !== "STYLE" && | |
node.parentElement.tagName !== "NOSCRIPT") { | |
longTextNodes.push(node); | |
} | |
} | |
if (longTextNodes.length > 0) { | |
return longTextNodes.map(node => getTextContent(node, false)).join(' '); | |
} | |
} | |
return text; | |
} | |
function countLeafNodes(el, cache = new Map()) { | |
// If this is a text node, it's a leaf | |
if (el.nodeType === Node.TEXT_NODE || (el.nodeType === Node.ELEMENT_NODE && el.children.length === 0)) { | |
return 1; | |
} | |
// Check if we already computed this element's leaf count | |
if (cache.has(el)) { | |
return cache.get(el); | |
} | |
let count = 1; | |
if (el.nodeType === Node.ELEMENT_NODE) { | |
for (const child of el.childNodes) { | |
count += countLeafNodes(child, cache); | |
} | |
} | |
cache.set(el, count); | |
return count; | |
} | |
function getTextNodes(el) { | |
const children = []; | |
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT); | |
while (walker.nextNode()) { | |
if ( | |
walker.currentNode.parentElement.tagName === "SCRIPT" || | |
walker.currentNode.parentElement.tagName === "STYLE" || | |
walker.currentNode.parentElement.tagName === "NOSCRIPT" | |
) { | |
continue; // Skip text nodes inside script, style, or noscript elements | |
} | |
if (getTextContent(walker.currentNode).length < 250) { | |
continue; | |
} | |
if (existingChildren.has(walker.currentNode)) { | |
continue; | |
} | |
existingChildren.add(walker.currentNode); | |
children.push(walker.currentNode); | |
} | |
if (el === document) { | |
let additional = Array.from(document.querySelectorAll('ul')); | |
additional = additional.concat(Array.from(document.querySelectorAll('ol'))); | |
additional = additional.concat(Array.from(document.querySelectorAll('p'))); | |
const existingRawText = new Set(children.map(node => getTextContent(node))); | |
for (const el of additional) { | |
if (getTextContent(el).length < 250) { | |
continue; // Skip elements with less than 250 characters | |
} | |
if (existingRawText.has(getTextContent(el)) || existingChildren.has(el)) { | |
continue; // Skip elements that already have their text content in the list | |
} | |
let has_long_text = false; | |
for (const child of getTextNodes(el)) { | |
if (getTextContent(child).length >= 50) { | |
has_long_text = true; | |
break; // Stop checking if we found a long text node | |
} | |
} | |
if (!has_long_text) { | |
continue; // Skip elements without long text nodes | |
} | |
existingChildren.add(el); | |
children.push(el); | |
} | |
// Find the parent element shared by the most children (avoiding nav/sidebar elements) | |
const parentCounts = new Map(); | |
// Function to check if an element should be excluded | |
function shouldExcludeElement(element) { | |
const tagName = element.tagName.toLowerCase(); | |
if (['nav', 'header', 'footer', 'aside'].includes(tagName)) { | |
return true; | |
} | |
const className = element.className || ''; | |
const id = element.id || ''; | |
const excludeKeywords = ['nav', 'menu', 'sidebar', 'header', 'footer', 'advertisement', 'ads', 'banner']; | |
for (const keyword of excludeKeywords) { | |
if (className.toLowerCase().includes(keyword) || id.toLowerCase().includes(keyword)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
// Count how many children each parent element has, excluding navigation/sidebar elements | |
for (const child of children) { | |
let parent = child.nodeType === 3 ? child.parentElement : child.parentElement; | |
// Walk up the DOM tree to find suitable parent candidates | |
while (parent && parent !== document.body) { | |
if (!shouldExcludeElement(parent)) { | |
parentCounts.set(parent, (parentCounts.get(parent) || 0) + 1); | |
} | |
parent = parent.parentElement; | |
} | |
} | |
// Find the parent with the most children that has substantial content | |
let mostCommonParent = null; | |
let maxScore = 0; | |
const documentNodes = countLeafNodes(document.body, cache); | |
for (const [parent, count] of parentCounts) { | |
// favor parents with the highest score | |
const score = count * (1 - ((countLeafNodes(parent, cache) || 1) / documentNodes)); | |
if (score > maxScore) { | |
if (shouldExcludeElement(parent)) { | |
continue; // Skip if the parent is a navigation or sidebar element | |
} | |
maxScore = count; | |
mostCommonParent = parent; | |
} | |
} | |
if (mostCommonParent && !existingChildren.has(mostCommonParent)) { | |
existingChildren.add(mostCommonParent); | |
children.push(mostCommonParent); | |
} | |
} | |
return children; | |
} | |
function highlightText() { | |
const nodes = getTextNodes(document); | |
const nodesAsText = nodes.map(node => getTextContent(node)); | |
GM.xmlHttpRequest({ | |
method: 'POST', | |
url: server, | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
data: JSON.stringify(nodesAsText), | |
onload: function (result) { | |
const response = JSON.parse(result.responseText); | |
if (response && response.length > 0) { | |
response.forEach((item, index) => { | |
if (item.label === "AI" && item.score > 0.90) { | |
if (nodes[index].nodeType == Node.TEXT_NODE) { | |
nodes[index].parentElement.style = | |
"background-color: #FF55FF55 !important; " + | |
nodes[index].parentElement.style; | |
} else { | |
nodes[index].style = | |
"background-color: #FF55FF55 !important; " + | |
nodes[index].style; | |
} | |
} | |
}); | |
} | |
} | |
}); | |
} | |
let queued = true; | |
setTimeout(() => { | |
highlightText(); | |
queued = false; | |
}, 1000); | |
// rerun when there are significant changes in the DOM | |
const observer = new MutationObserver((mutations) => { | |
mutations.forEach((mutation) => { | |
cache.delete(mutation.target); | |
for (const node of mutation.removedNodes) { | |
if (node.nodeType === Node.ELEMENT_NODE) { | |
cache.delete(node); | |
} | |
} | |
if (!queued && mutation.type === 'childList' && mutation.addedNodes.length > 0) { | |
queued = true; | |
setTimeout(() => { | |
highlightText(); | |
queued = false; | |
}, 1000); | |
} | |
}); | |
}); | |
observer.observe(document.body, { | |
childList: true, | |
subtree: true | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from flask import Flask, jsonify, request | |
import argparse | |
# Uses https://huggingface.co/desklib/ai-text-detector-v1.01 | |
# This one is slightly more reliable but needs way more vram, is slower, and uses WAY more electricity. | |
parser = argparse.ArgumentParser(description='Deepfake detector') | |
parser.add_argument("--port", type=int, default=5000, help="Port to listen on.") | |
parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.") | |
args = parser.parse_args() | |
import torch | |
import torch.nn as nn | |
from transformers import AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel | |
class DesklibAIDetectionModel(PreTrainedModel): | |
config_class = AutoConfig | |
def __init__(self, config): | |
super().__init__(config) | |
# Initialize the base transformer model. | |
self.model = AutoModel.from_config(config) | |
# Define a classifier head. | |
self.classifier = nn.Linear(config.hidden_size, 1) | |
# Initialize weights (handled by PreTrainedModel) | |
self.init_weights() | |
def forward(self, input_ids, attention_mask=None, labels=None): | |
# Forward pass through the transformer | |
outputs = self.model(input_ids, attention_mask=attention_mask) | |
last_hidden_state = outputs[0] | |
# Mean pooling | |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float() | |
sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1) | |
sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9) | |
pooled_output = sum_embeddings / sum_mask | |
# Classifier | |
logits = self.classifier(pooled_output) | |
loss = None | |
if labels is not None: | |
loss_fct = nn.BCEWithLogitsLoss() | |
loss = loss_fct(logits.view(-1), labels.float()) | |
output = {"logits": logits} | |
if loss is not None: | |
output["loss"] = loss | |
return output | |
def predict_single_text(text, model, tokenizer, device, max_len=768, threshold=0.5): | |
encoded = tokenizer( | |
text, | |
padding='max_length', | |
truncation=True, | |
max_length=max_len, | |
return_tensors='pt' | |
) | |
input_ids = encoded['input_ids'].to(device) | |
attention_mask = encoded['attention_mask'].to(device) | |
model.eval() | |
with torch.no_grad(): | |
outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
logits = outputs["logits"] | |
probability = torch.sigmoid(logits).item() | |
label = 'AI' if probability >= threshold else 'Human' | |
return { "label": label, "score": probability } | |
model_directory = "." | |
tokenizer = AutoTokenizer.from_pretrained(model_directory) | |
model = DesklibAIDetectionModel.from_pretrained(model_directory) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
app = Flask(__name__) | |
@app.route('/', methods = ['POST']) | |
def index(): | |
data = request.get_json() | |
try: | |
return jsonify([predict_single_text(f, model, tokenizer, device) for f in data]) | |
except: | |
return jsonify({ "error": 'unhandled exception' }) | |
if __name__ == '__main__': | |
app.run(host=args.ip, threaded=True, port=args.port) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from flask import Flask, jsonify, request | |
import argparse | |
# Uses https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1 | |
# This one is pretty good and light on vram, but not as good as the slower model. | |
parser = argparse.ArgumentParser(description='Deepfake detector') | |
parser.add_argument("--port", type=int, default=5000, help="Port to listen on.") | |
parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.") | |
args = parser.parse_args() | |
from transformers import pipeline | |
from utils import clean_text | |
classifier = pipeline( | |
"text-classification", | |
model=".", | |
) | |
app = Flask(__name__) | |
@app.route('/', methods = ['POST']) | |
def index(): | |
data = request.get_json() | |
try: | |
return jsonify(classifier([clean_text(f) for f in data], truncation=True)) | |
except: | |
return jsonify({ "error": 'unhandled exception' }) | |
if __name__ == '__main__': | |
app.run(host=args.ip, threaded=True, port=args.port) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment