iwalton3 · June 26, 2025 00:12
diff --git a/ai-detector.js b/ai-detector.js
 // ==UserScript==
 // @name     AI Detector
 // @version  3
 // @grant GM.xmlHttpRequest
 // @include  *
 // @connect  127.0.0.1:5000
 // @description Detects AI-generated text on a webpage and highlights it.
 // ==/UserScript==

 const server = 'http://127.0.0.1:5000';
 const existingChildren = new Set();
 const cache = new Map();

 function getTextContent(el, useExtendedLogic = true) {
  const text = el.innerText ? el.innerText.trim() : el.textContent.trim();

  if (text.length > 1000 && useExtendedLogic) {
    // this is a long text, iterate through it and remove short text nodes
    const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
    const longTextNodes = [];
    while (walker.nextNode()) {
      const node = walker.currentNode;
      if (node.textContent.trim().length >= 250 &&
          node.parentElement.tagName !== "SCRIPT" &&
          node.parentElement.tagName !== "STYLE" &&
          node.parentElement.tagName !== "NOSCRIPT") {
        longTextNodes.push(node);
      }
    }
    if (longTextNodes.length > 0) {
      return longTextNodes.map(node => getTextContent(node, false)).join(' ');
    }
  }

  return text;
 }

 function countLeafNodes(el, cache = new Map()) {
  // If this is a text node, it's a leaf
  if (el.nodeType === Node.TEXT_NODE || (el.nodeType === Node.ELEMENT_NODE && el.children.length === 0)) {
    return 1;
  }

  // Check if we already computed this element's leaf count
  if (cache.has(el)) {
    return cache.get(el);
  }

  let count = 1;

  if (el.nodeType === Node.ELEMENT_NODE) {
    for (const child of el.childNodes) {
      count += countLeafNodes(child, cache);
    }
  }

  cache.set(el, count);
  return count;
 }

 function getTextNodes(el) {
  const children = [];
  const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
  while (walker.nextNode()) {
    if (
      walker.currentNode.parentElement.tagName === "SCRIPT" ||
      walker.currentNode.parentElement.tagName === "STYLE" ||
      walker.currentNode.parentElement.tagName === "NOSCRIPT"
    ) {
      continue; // Skip text nodes inside script, style, or noscript elements
    }
    if (getTextContent(walker.currentNode).length < 250) {
      continue;
    }
    if (existingChildren.has(walker.currentNode)) {
      continue;
    }

    existingChildren.add(walker.currentNode);
    children.push(walker.currentNode);
  }

  if (el === document) {
    let additional = Array.from(document.querySelectorAll('ul'));
    additional = additional.concat(Array.from(document.querySelectorAll('ol')));
    additional = additional.concat(Array.from(document.querySelectorAll('p')));

    const existingRawText = new Set(children.map(node => getTextContent(node)));

    for (const el of additional) {
      if (getTextContent(el).length < 250) {
        continue; // Skip elements with less than 250 characters
      }

      if (existingRawText.has(getTextContent(el)) || existingChildren.has(el)) {
        continue; // Skip elements that already have their text content in the list
      }

      let has_long_text = false;
      for (const child of getTextNodes(el)) {
        if (getTextContent(child).length >= 50) {
          has_long_text = true;
          break; // Stop checking if we found a long text node
        }
      }

      if (!has_long_text) {
        continue; // Skip elements without long text nodes
      }

      existingChildren.add(el);
      children.push(el);
    }

    // Find the parent element shared by the most children (avoiding nav/sidebar elements)
    const parentCounts = new Map();

    // Function to check if an element should be excluded
    function shouldExcludeElement(element) {
      const tagName = element.tagName.toLowerCase();
      if (['nav', 'header', 'footer', 'aside'].includes(tagName)) {
        return true;
      }

      const className = element.className || '';
      const id = element.id || '';
      const excludeKeywords = ['nav', 'menu', 'sidebar', 'header', 'footer', 'advertisement', 'ads', 'banner'];

      for (const keyword of excludeKeywords) {
        if (className.toLowerCase().includes(keyword) || id.toLowerCase().includes(keyword)) {
          return true;
        }
      }

      return false;
    }

    // Count how many children each parent element has, excluding navigation/sidebar elements
    for (const child of children) {
      let parent = child.nodeType === 3 ? child.parentElement : child.parentElement;

      // Walk up the DOM tree to find suitable parent candidates
      while (parent && parent !== document.body) {
        if (!shouldExcludeElement(parent)) {
          parentCounts.set(parent, (parentCounts.get(parent) || 0) + 1);
        }
        parent = parent.parentElement;
      }
    }

    // Find the parent with the most children that has substantial content
    let mostCommonParent = null;
    let maxScore = 0;
    const documentNodes = countLeafNodes(document.body, cache);
    for (const [parent, count] of parentCounts) {
      // favor parents with the highest score
      const score = count * (1 - ((countLeafNodes(parent, cache) || 1) / documentNodes));
      if (score > maxScore) {
        if (shouldExcludeElement(parent)) {
          continue; // Skip if the parent is a navigation or sidebar element
        }

        maxScore = count;
        mostCommonParent = parent;
      }
    }

    if (mostCommonParent && !existingChildren.has(mostCommonParent)) {
      existingChildren.add(mostCommonParent);
      children.push(mostCommonParent);
    }

  }

  return children;
 }

 function highlightText() {
  const nodes = getTextNodes(document);
  const nodesAsText = nodes.map(node => getTextContent(node));
  GM.xmlHttpRequest({
    method: 'POST',
    url: server,
    headers: {
      'Content-Type': 'application/json'
    },
    data: JSON.stringify(nodesAsText),
    onload: function (result) {
      const response = JSON.parse(result.responseText);
      if (response && response.length > 0) {
        response.forEach((item, index) => {
          if (item.label === "AI" && item.score > 0.90) {
            if (nodes[index].nodeType == Node.TEXT_NODE) {
              nodes[index].parentElement.style =
              "background-color: #FF55FF55 !important; " +
              nodes[index].parentElement.style;
            } else {
              nodes[index].style =
              "background-color: #FF55FF55 !important; " +
              nodes[index].style;
            }
          }
        });
      }
    }
  });
 }

 let queued = true;

 setTimeout(() => {
  highlightText();
  queued = false;
 }, 1000);

 // rerun when there are significant changes in the DOM
 const observer = new MutationObserver((mutations) => {
  mutations.forEach((mutation) => {
    cache.delete(mutation.target);
    for (const node of mutation.removedNodes) {
      if (node.nodeType === Node.ELEMENT_NODE) {
        cache.delete(node);
      }
    }
    if (!queued && mutation.type === 'childList' && mutation.addedNodes.length > 0) {
      queued = true;
      setTimeout(() => {
        highlightText();
        queued = false;
      }, 1000);
    }
  });
 });
 observer.observe(document.body, {
  childList: true,
  subtree: true
 });
diff --git a/serve-desklib.py b/serve-desklib.py
 #!/usr/bin/env python3
 from flask import Flask, jsonify, request
 import argparse

 # Uses https://huggingface.co/desklib/ai-text-detector-v1.01
 # This one is slightly more reliable but needs way more vram, is slower, and uses WAY more electricity.

 parser = argparse.ArgumentParser(description='Deepfake detector')
 parser.add_argument("--port", type=int, default=5000, help="Port to listen on.")
 parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.")
 args = parser.parse_args()


 import torch
 import torch.nn as nn
 from transformers import AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel

 class DesklibAIDetectionModel(PreTrainedModel):
    config_class = AutoConfig

    def __init__(self, config):
        super().__init__(config)
        # Initialize the base transformer model.
        self.model = AutoModel.from_config(config)
        # Define a classifier head.
        self.classifier = nn.Linear(config.hidden_size, 1)
        # Initialize weights (handled by PreTrainedModel)
        self.init_weights()

    def forward(self, input_ids, attention_mask=None, labels=None):
        # Forward pass through the transformer
        outputs = self.model(input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs[0]
        # Mean pooling
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1)
        sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
        pooled_output = sum_embeddings / sum_mask

        # Classifier
        logits = self.classifier(pooled_output)
        loss = None
        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits.view(-1), labels.float())

        output = {"logits": logits}
        if loss is not None:
            output["loss"] = loss
        return output

 def predict_single_text(text, model, tokenizer, device, max_len=768, threshold=0.5):
    encoded = tokenizer(
        text,
        padding='max_length',
        truncation=True,
        max_length=max_len,
        return_tensors='pt'
    )
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs["logits"]
        probability = torch.sigmoid(logits).item()

    label = 'AI' if probability >= threshold else 'Human'
    return { "label": label, "score": probability }

 model_directory = "."
 tokenizer = AutoTokenizer.from_pretrained(model_directory)
 model = DesklibAIDetectionModel.from_pretrained(model_directory)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)

 app = Flask(__name__)

 @app.route('/', methods = ['POST'])
 def index():
    data = request.get_json()
    try:
        return jsonify([predict_single_text(f, model, tokenizer, device) for f in data])
    except:
        return jsonify({ "error": 'unhandled exception' })

 if __name__ == '__main__':
    app.run(host=args.ip, threaded=True, port=args.port)
diff --git a/serve.py b/serve.py
 #!/usr/bin/env python3
 from flask import Flask, jsonify, request
 import argparse

 # Uses https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
 # This one is pretty good and light on vram, but not as good as the slower model.

 parser = argparse.ArgumentParser(description='Deepfake detector')
 parser.add_argument("--port", type=int, default=5000, help="Port to listen on.")
 parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.")
 args = parser.parse_args()

 from transformers import pipeline
 from utils import clean_text

 classifier = pipeline(
    "text-classification",
    model=".",
 )

 app = Flask(__name__)

 @app.route('/', methods = ['POST'])
 def index():
    data = request.get_json()
    try:
        return jsonify(classifier([clean_text(f) for f in data], truncation=True))
    except:
        return jsonify({ "error": 'unhandled exception' })

 if __name__ == '__main__':
    app.run(host=args.ip, threaded=True, port=args.port)
	// ==UserScript==
	// @name AI Detector
	// @version 3
	// @grant GM.xmlHttpRequest
	// @include *
	// @connect 127.0.0.1:5000
	// @description Detects AI-generated text on a webpage and highlights it.
	// ==/UserScript==

	const server = 'http://127.0.0.1:5000';
	const existingChildren = new Set();
	const cache = new Map();

	function getTextContent(el, useExtendedLogic = true) {
	const text = el.innerText ? el.innerText.trim() : el.textContent.trim();

	if (text.length > 1000 && useExtendedLogic) {
	// this is a long text, iterate through it and remove short text nodes
	const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
	const longTextNodes = [];
	while (walker.nextNode()) {
	const node = walker.currentNode;
	if (node.textContent.trim().length >= 250 &&
	node.parentElement.tagName !== "SCRIPT" &&
	node.parentElement.tagName !== "STYLE" &&
	node.parentElement.tagName !== "NOSCRIPT") {
	longTextNodes.push(node);
	}
	}
	if (longTextNodes.length > 0) {
	return longTextNodes.map(node => getTextContent(node, false)).join(' ');
	}
	}

	return text;
	}

	function countLeafNodes(el, cache = new Map()) {
	// If this is a text node, it's a leaf
	if (el.nodeType === Node.TEXT_NODE \|\| (el.nodeType === Node.ELEMENT_NODE && el.children.length === 0)) {
	return 1;
	}

	// Check if we already computed this element's leaf count
	if (cache.has(el)) {
	return cache.get(el);
	}

	let count = 1;

	if (el.nodeType === Node.ELEMENT_NODE) {
	for (const child of el.childNodes) {
	count += countLeafNodes(child, cache);
	}
	}

	cache.set(el, count);
	return count;
	}

	function getTextNodes(el) {
	const children = [];
	const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT);
	while (walker.nextNode()) {
	if (
	walker.currentNode.parentElement.tagName === "SCRIPT" \|\|
	walker.currentNode.parentElement.tagName === "STYLE" \|\|
	walker.currentNode.parentElement.tagName === "NOSCRIPT"
	) {
	continue; // Skip text nodes inside script, style, or noscript elements
	}
	if (getTextContent(walker.currentNode).length < 250) {
	continue;
	}
	if (existingChildren.has(walker.currentNode)) {
	continue;
	}

	existingChildren.add(walker.currentNode);
	children.push(walker.currentNode);
	}

	if (el === document) {
	let additional = Array.from(document.querySelectorAll('ul'));
	additional = additional.concat(Array.from(document.querySelectorAll('ol')));
	additional = additional.concat(Array.from(document.querySelectorAll('p')));

	const existingRawText = new Set(children.map(node => getTextContent(node)));

	for (const el of additional) {
	if (getTextContent(el).length < 250) {
	continue; // Skip elements with less than 250 characters
	}

	if (existingRawText.has(getTextContent(el)) \|\| existingChildren.has(el)) {
	continue; // Skip elements that already have their text content in the list
	}

	let has_long_text = false;
	for (const child of getTextNodes(el)) {
	if (getTextContent(child).length >= 50) {
	has_long_text = true;
	break; // Stop checking if we found a long text node
	}
	}

	if (!has_long_text) {
	continue; // Skip elements without long text nodes
	}

	existingChildren.add(el);
	children.push(el);
	}

	// Find the parent element shared by the most children (avoiding nav/sidebar elements)
	const parentCounts = new Map();

	// Function to check if an element should be excluded
	function shouldExcludeElement(element) {
	const tagName = element.tagName.toLowerCase();
	if (['nav', 'header', 'footer', 'aside'].includes(tagName)) {
	return true;
	}

	const className = element.className \|\| '';
	const id = element.id \|\| '';
	const excludeKeywords = ['nav', 'menu', 'sidebar', 'header', 'footer', 'advertisement', 'ads', 'banner'];

	for (const keyword of excludeKeywords) {
	if (className.toLowerCase().includes(keyword) \|\| id.toLowerCase().includes(keyword)) {
	return true;
	}
	}

	return false;
	}

	// Count how many children each parent element has, excluding navigation/sidebar elements
	for (const child of children) {
	let parent = child.nodeType === 3 ? child.parentElement : child.parentElement;

	// Walk up the DOM tree to find suitable parent candidates
	while (parent && parent !== document.body) {
	if (!shouldExcludeElement(parent)) {
	parentCounts.set(parent, (parentCounts.get(parent) \|\| 0) + 1);
	}
	parent = parent.parentElement;
	}
	}

	// Find the parent with the most children that has substantial content
	let mostCommonParent = null;
	let maxScore = 0;
	const documentNodes = countLeafNodes(document.body, cache);
	for (const [parent, count] of parentCounts) {
	// favor parents with the highest score
	const score = count * (1 - ((countLeafNodes(parent, cache) \|\| 1) / documentNodes));
	if (score > maxScore) {
	if (shouldExcludeElement(parent)) {
	continue; // Skip if the parent is a navigation or sidebar element
	}

	maxScore = count;
	mostCommonParent = parent;
	}
	}

	if (mostCommonParent && !existingChildren.has(mostCommonParent)) {
	existingChildren.add(mostCommonParent);
	children.push(mostCommonParent);
	}

	}

	return children;
	}

	function highlightText() {
	const nodes = getTextNodes(document);
	const nodesAsText = nodes.map(node => getTextContent(node));
	GM.xmlHttpRequest({
	method: 'POST',
	url: server,
	headers: {
	'Content-Type': 'application/json'
	},
	data: JSON.stringify(nodesAsText),
	onload: function (result) {
	const response = JSON.parse(result.responseText);
	if (response && response.length > 0) {
	response.forEach((item, index) => {
	if (item.label === "AI" && item.score > 0.90) {
	if (nodes[index].nodeType == Node.TEXT_NODE) {
	nodes[index].parentElement.style =
	"background-color: #FF55FF55 !important; " +
	nodes[index].parentElement.style;
	} else {
	nodes[index].style =
	"background-color: #FF55FF55 !important; " +
	nodes[index].style;
	}
	}
	});
	}
	}
	});
	}

	let queued = true;

	setTimeout(() => {
	highlightText();
	queued = false;
	}, 1000);

	// rerun when there are significant changes in the DOM
	const observer = new MutationObserver((mutations) => {
	mutations.forEach((mutation) => {
	cache.delete(mutation.target);
	for (const node of mutation.removedNodes) {
	if (node.nodeType === Node.ELEMENT_NODE) {
	cache.delete(node);
	}
	}
	if (!queued && mutation.type === 'childList' && mutation.addedNodes.length > 0) {
	queued = true;
	setTimeout(() => {
	highlightText();
	queued = false;
	}, 1000);
	}
	});
	});
	observer.observe(document.body, {
	childList: true,
	subtree: true
	});
	#!/usr/bin/env python3
	from flask import Flask, jsonify, request
	import argparse

	# Uses https://huggingface.co/desklib/ai-text-detector-v1.01
	# This one is slightly more reliable but needs way more vram, is slower, and uses WAY more electricity.

	parser = argparse.ArgumentParser(description='Deepfake detector')
	parser.add_argument("--port", type=int, default=5000, help="Port to listen on.")
	parser.add_argument("--ip", default="127.0.0.1", help="IP to listen on.")
	args = parser.parse_args()


	import torch
	import torch.nn as nn
	from transformers import AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel

	class DesklibAIDetectionModel(PreTrainedModel):
	config_class = AutoConfig

	def __init__(self, config):
	super().__init__(config)
	# Initialize the base transformer model.
	self.model = AutoModel.from_config(config)
	# Define a classifier head.
	self.classifier = nn.Linear(config.hidden_size, 1)
	# Initialize weights (handled by PreTrainedModel)
	self.init_weights()

	def forward(self, input_ids, attention_mask=None, labels=None):
	# Forward pass through the transformer
	outputs = self.model(input_ids, attention_mask=attention_mask)
	last_hidden_state = outputs[0]
	# Mean pooling
	input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
	sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, dim=1)
	sum_mask = torch.clamp(input_mask_expanded.sum(dim=1), min=1e-9)
	pooled_output = sum_embeddings / sum_mask

	# Classifier
	logits = self.classifier(pooled_output)
	loss = None
	if labels is not None:
	loss_fct = nn.BCEWithLogitsLoss()
	loss = loss_fct(logits.view(-1), labels.float())

	output = {"logits": logits}
	if loss is not None:
	output["loss"] = loss
	return output

	def predict_single_text(text, model, tokenizer, device, max_len=768, threshold=0.5):
	encoded = tokenizer(
	text,
	padding='max_length',
	truncation=True,
	max_length=max_len,
	return_tensors='pt'
	)
	input_ids = encoded['input_ids'].to(device)
	attention_mask = encoded['attention_mask'].to(device)

	model.eval()
	with torch.no_grad():
	outputs = model(input_ids=input_ids, attention_mask=attention_mask)
	logits = outputs["logits"]
	probability = torch.sigmoid(logits).item()

	label = 'AI' if probability >= threshold else 'Human'
	return { "label": label, "score": probability }

	model_directory = "."
	tokenizer = AutoTokenizer.from_pretrained(model_directory)
	model = DesklibAIDetectionModel.from_pretrained(model_directory)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	app = Flask(__name__)

	@app.route('/', methods = ['POST'])
	def index():
	data = request.get_json()
	try:
	return jsonify([predict_single_text(f, model, tokenizer, device) for f in data])
	except:
	return jsonify({ "error": 'unhandled exception' })

	if __name__ == '__main__':
	app.run(host=args.ip, threaded=True, port=args.port)