Created
February 19, 2016 13:30
-
-
Save danielearwicker/0da828e0e8f7680b0d4a to your computer and use it in GitHub Desktop.
Simple minimal backup of mediawiki
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Specialized; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Text.RegularExpressions; | |
namespace WikiBackup | |
{ | |
class Program | |
{ | |
private const string BaseUrl = "http://wiki/mediawiki/"; | |
private const string IndexUrl = BaseUrl + "index.php/"; | |
private const string BackupPath = @"\\fileserver\home$\daniele\WikiBackups"; | |
static void BackupText() | |
{ | |
var backupTextPath = Path.Combine(BackupPath, "Text"); | |
if (!Directory.Exists(backupTextPath)) | |
{ | |
Directory.CreateDirectory(backupTextPath); | |
} | |
var allPagesRaw = new WebClient().DownloadString(IndexUrl + "Special:AllPages"); | |
const string Prefix = "<table class=\"mw-allpages-table-chunk\"><tr><td style=\"width:33%\">"; | |
var prefixPos = allPagesRaw.IndexOf(Prefix); | |
if (prefixPos == -1) | |
throw new InvalidOperationException("Unexpected data from Special:AllPages"); | |
allPagesRaw = allPagesRaw.Substring(prefixPos + Prefix.Length); | |
const string Suffix = "</table>"; | |
var suffixPos = allPagesRaw.IndexOf(Suffix); | |
if (suffixPos == -1) | |
throw new InvalidOperationException("Unexpected data from Special:AllPages"); | |
allPagesRaw = allPagesRaw.Substring(0, suffixPos); | |
var linkPattern = new Regex(@"\<a href=""[^""]+"" title=""([^""]+)""\>"); | |
var allPages = linkPattern.Matches(allPagesRaw).OfType<Match>().Select(m => m.Groups[1].Value); | |
var values = new NameValueCollection { { "pages", string.Join("\n", allPages) } }; | |
using (var client = new WebClient()) | |
{ | |
client.Headers.Add("Content-Type", "application/x-www-form-urlencoded"); | |
var result = client.UploadValues("http://wiki/mediawiki/index.php?title=Special:Export&action=submit", "POST", values); | |
var backupName = Path.Combine(backupTextPath, DateTime.Now.ToString("yyyy-MM-dd-hh-mm-ss")) + ".xml"; | |
File.WriteAllBytes(backupName, result); | |
} | |
var files = Directory.EnumerateFiles(backupTextPath).ToList(); | |
files.Sort(); | |
foreach (var old in files.Take(files.Count - 10)) | |
{ | |
try | |
{ | |
File.Delete(old); | |
} | |
catch (Exception) | |
{ | |
} | |
} | |
} | |
static void BackupImages() | |
{ | |
var backupImagesPath = Path.Combine(BackupPath, "Images"); | |
if (!Directory.Exists(backupImagesPath)) | |
{ | |
Directory.CreateDirectory(backupImagesPath); | |
} | |
var newFilesRaw = new WebClient().DownloadString(IndexUrl + "Special:NewFiles"); | |
var filePattern = new Regex(@"\<img alt=""\(thumbnail\)"" src=""/mediawiki/images/thumb/([^""]+)"""); | |
var allFiles = filePattern.Matches(newFilesRaw).OfType<Match>().Select(m => m.Groups[1].Value); | |
foreach (var fileName in allFiles) | |
{ | |
using (var client = new WebClient()) | |
{ | |
var fileNameParts = fileName.Split('/').Take(3).ToList(); | |
var fileData = client.DownloadData(BaseUrl + "images/" + string.Join("/", fileNameParts)); | |
File.WriteAllBytes(Path.Combine(backupImagesPath, fileNameParts.Last()), fileData); | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
BackupText(); | |
BackupImages(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment