Last active
June 24, 2025 13:37
-
-
Save meredoth/a95398fdd5561d151bd5fd84d6ea2740 to your computer and use it in GitHub Desktop.
A PowerShell script that takes a Discourse topic URL and prints all posts containing external links. This is useful for identifying spam posts that hide links in subtle places, such as within commas or periods. If a post seems suspicious, run this script with the Discourse topic URL as a parameter to display all external links found in the posts.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
.SYNOPSIS | |
Takes a discourse topic url and prints all posts that contain external links. Useful for identifying spam posts that hide links in subtle places, such as within commas or periods. | |
.DESCRIPTION | |
The Check-Discourse-Topic-Links script takes a URL and uses the Invoke-RestMethod to check each post for the presence of a link_counts field. If the field exists, the script checks whether its internal property is set to false. If so, it prints the post number, the username of the post's author, the external link and the link text. | |
.PARAMETER discourseTopic | |
The discourse topic url to check. Note: the url must begin with http:// or https:// | |
.EXAMPLE | |
.\Check-Discourse-Topic-Links.ps1 https://discussions.unity.com/t/is-it-a-good-idea-to-put-my-game-on-itch-io-first/1657629 | |
#> | |
[CmdletBinding()] | |
param | |
( | |
[Parameter(Mandatory=$True)] | |
[string]$discourseTopic | |
) | |
$pageNumber = 0 | |
Write-Host "`n" | |
while($True) | |
{ | |
$pageNumber++ | |
$currentPage = $discourseTopic + ".json?page=" + $pageNumber | |
try | |
{ | |
Write-Verbose "Getting all posts from $discourseTopic page $pageNumber" | |
$pageData = (Invoke-RestMethod -UserAgent ([Microsoft.PowerShell.Commands.PSUserAgent]::Chrome) -Method GET -Uri $currentPage) | |
} | |
catch | |
{ | |
exit | |
} | |
$allPosts = $pageData.post_stream.posts | |
foreach($post in $allPosts) | |
{ | |
Write-Verbose "Checking post: $($post.post_number) by $($post.username) `n" | |
if($post.link_counts) | |
{ | |
Write-Verbose "Links found in post number $($post.post_number) `n" | |
foreach($link in $post.link_counts) | |
{ | |
Write-Verbose "Link: $link `n" | |
if($link.internal -eq $False) | |
{ | |
Write-Output "External link found in post number: $($post.post_number) by $($post.username)" | |
Write-Output "$($link.url)" | |
$escapedLink = [regex]::Escape($($link.url)) | |
$pattern = "<a\s+[^>]*href=`"$escapedLink`"[^>]*>(.*?)</a>" | |
if($post.cooked -match $pattern) | |
{ | |
$linkText = $matches[1] | |
Write-Output "Link Text: $linkText `n" | |
} | |
else | |
{ | |
Write-Output "No link text found `n" | |
} | |
Write-Verbose "Post body: $($post.cooked) `n" | |
} | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A C# script with the same functionality as the above powershell script | |
using System.Text.Json; | |
using System.Text.RegularExpressions; | |
if (args.Length != 1) | |
{ | |
Console.WriteLine("Usage: Program <DiscourseTopicUrl>"); | |
return; | |
} | |
string discourseTopic = args[0]; | |
int pageNumber = 0; | |
HttpClient client = new(); | |
client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; Chrome)"); | |
Console.WriteLine(); | |
while (true) | |
{ | |
pageNumber++; | |
string currentPage = $"{discourseTopic}.json?page={pageNumber}"; | |
try | |
{ | |
string response = await client.GetStringAsync(currentPage); | |
using JsonDocument document = JsonDocument.Parse(response); | |
JsonElement allPosts = document.RootElement.GetProperty("post_stream").GetProperty("posts"); | |
foreach (var post in allPosts.EnumerateArray()) | |
{ | |
if (!post.TryGetProperty("link_counts", out var allLinks) || | |
allLinks.ValueKind != JsonValueKind.Array) continue; | |
foreach (var link in allLinks.EnumerateArray()) | |
{ | |
ProcessLink(link, post); | |
} | |
} | |
} | |
catch (HttpRequestException) | |
{ | |
break; | |
} | |
} | |
return; | |
static void ProcessLink(JsonElement link, JsonElement post) | |
{ | |
if (!link.TryGetProperty("internal", out var internalLink) || | |
internalLink.ValueKind != JsonValueKind.False) return; | |
int postNumber = post.GetProperty("post_number").GetInt32(); | |
string? url = link.GetProperty("url").GetString(); | |
if (url == null) | |
throw new NullReferenceException($"link: {link} url property from the parsed json post: {post} is null!"); | |
string? username = post.GetProperty("username").GetString(); | |
if (username == null) | |
throw new NullReferenceException($"link: {link} username property from the parsed json post: {post} is null!"); | |
string? postBody = post.GetProperty("cooked").GetString(); | |
if (postBody == null) | |
throw new NullReferenceException($"link: {link} cooked property from the parsed json post: {post} is null!"); | |
string escapedLink = Regex.Escape(url); | |
string pattern = $"""<a\s+[^>]*href=\"{escapedLink}\"[^>]*>(.*?)</a>"""; | |
Console.WriteLine($"External link found in post number: {postNumber} by {username}"); | |
Console.WriteLine(url); | |
Match match = Regex.Match(postBody, pattern); | |
if (match.Success) | |
{ | |
string linkText = match.Groups[1].Value; | |
Console.WriteLine($"Link Text: {linkText}\n"); | |
} | |
else | |
{ | |
Console.WriteLine("No link text found\n"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment