cwe1ss · September 22, 2022 13:53
diff --git a/index.js b/index.js
 const fs = require('fs/promises')
 const path = require('path')
 //const util = require('util')
 const sanitize = require("sanitize-filename")
 const HTMLParser = require('node-html-parser')

 const TurndownService = require('turndown')
 const TurndownPluginGfmService = require('@guyplusplus/turndown-plugin-gfm')
 const TurndownPluginConfluenceToGfmService = require('turndown-plugin-confluence-to-gfm')

 const turndownService = new TurndownService()
 TurndownPluginGfmService.gfm(turndownService)
 TurndownPluginConfluenceToGfmService.confluenceGfm(turndownService)


 ///////////////////////////////////////
 // Source and destination folders

 // A directory which has been exported from Confluence using the HTML export.
 const htmlDirectory = 'C:/temp/Confluence-export/ABC'

 // The target directory
 const markdownDirectory = 'c:/temp/AzDevOps/docs/ABC'


 ; (async () => {

    const newAttachmentsDirectoryName = '.attachments'

    ///////////////////////////////////////
    // Read folder structure, page titles, etc from index.html

    var addChildren = function(ul, result, parentFolders) {
        if (!ul) {
            return
        }

        for (const listItemNode of ul.childNodes) {
            if (listItemNode.tagName != 'LI') continue;
            
            var obj = {
                htmlFileName: '',
                markdownFileName: '',
                folderName: null,
                parents: parentFolders
            }

            result.push(obj)

            var addAsParent = true

            for (const listItemChildNode of listItemNode.childNodes) {
                if (listItemChildNode.tagName == 'A') {
                    obj.htmlFileName = listItemChildNode.attributes['href']
                    
                    var pageTitle = listItemChildNode.innerText

                    pageTitle = pageTitle
                        .replaceAll('&amp;', '&')
                        .replaceAll('&#39;', '\'')
                        .replaceAll('&quot;', '"')
                        .replaceAll('/', '|')
                        // https://learn.microsoft.com/en-us/azure/devops/project/wiki/wiki-file-structure?view=azure-devops#special-characters-in-wiki-page-titles
                        .replaceAll(':', '%3A')
                        .replaceAll('<', '%3C')
                        .replaceAll('>', '%3E')
                        .replaceAll('*', '%2A')
                        .replaceAll('?', '%3F')
                        .replaceAll('|', '%7C')
                        .replaceAll('-', '%2D')
                        .replaceAll('"', '%22')

                    var sanitizedPageTitle = sanitize(pageTitle)

                    // Azure DevOps Wiki doesn't allow spaces
                    sanitizedPageTitle = sanitizedPageTitle.replaceAll(' ', '-')

                    obj.markdownFileName = sanitizedPageTitle + '.md'
                    obj.folderName = sanitizedPageTitle
                }

                if (listItemChildNode.tagName == 'IMG') {
                    // The sole root item is the home page (with an img tag to recognize it) and we don't need a separate folder for it.
                    addAsParent = false
                }

                if (listItemChildNode.tagName == 'UL') {
                    const newParents = [...parentFolders]
                    if (addAsParent) {
                        newParents.push(obj.folderName)
                    }
                    
                    addChildren(listItemChildNode, result, newParents)
                }
            }
        }
    }

    var getFiles = async function() {
        const indexFilePath = path.join(htmlDirectory, 'index.html')
        const htmlContent = await fs.readFile(indexFilePath, { encoding: 'utf-8'} )
    
        const html = HTMLParser.parse(htmlContent)
        const pageSection = html.querySelector('div.pageSection ul')

        var files = []
        addChildren(pageSection, files, [])

        files.unshift({
            htmlFileName: 'index.html',
            markdownFileName: 'index.md',
            pageTitle: html.querySelector('head title').innerText,
            folderName: null,
            parents: []
        })

        return files
    }

    var files = await getFiles()

    // console.log(util.inspect(files, { depth: null, colors: true }))
    // return

    ///////////////////////////////////////
    // Copy "attachments"-directory to ".attachments" (Azure DevOps requires this name)

    const existingAttachmentsDirectory = path.join(htmlDirectory, 'attachments')
    const newAttachmentsDirectory = path.join(markdownDirectory, newAttachmentsDirectoryName)

    var copyRecursiveSync = async function(src, dest) {
        // https://stackoverflow.com/a/22185855

        var stats = await fs.stat(src)
        var isDirectory = stats.isDirectory()
        if (isDirectory) {
            await fs.mkdir(dest, { recursive: true })
            const files = await fs.readdir(src)
            for (const childItemName of files) {
                await copyRecursiveSync(path.join(src, childItemName), path.join(dest, childItemName))
            }
        } else {
            await fs.copyFile(src, dest)
        }
    };
    await copyRecursiveSync(existingAttachmentsDirectory, newAttachmentsDirectory)

    ///////////////////////////////////////
    // Convert pages

    for (const file of files) {
        const htmlFileFullPath = path.join(htmlDirectory, file.htmlFileName)
        const markdownFileDirectory = path.join(markdownDirectory, ...file.parents)
        const markdownFileFullPath = path.join(markdownFileDirectory, file.markdownFileName)

        console.log(htmlFileFullPath + " --> " + markdownFileFullPath)

        ///////////////////////////////////////
        // Load file content

        var htmlContent = await fs.readFile(htmlFileFullPath, { encoding: 'utf-8'} )

        ///////////////////////////////////////
        // Replace links

        for (const linkFile of files) {
            var target = '../'.repeat(file.parents.length)
            target += linkFile.parents.join('/') + (linkFile.parents.length > 0 ? '/' : '')
            target += linkFile.markdownFileName
            
            //console.log('  - ' + linkFile.htmlFileName + ': ' + target)

            htmlContent = htmlContent.replaceAll(linkFile.htmlFileName, target)
        }

        ///////////////////////////////////////
        // Replace attachment links (since we place files in subfolders)

        const attachmentsUrl = '../'.repeat(file.parents.length) + newAttachmentsDirectoryName + '/'
        htmlContent = htmlContent.replaceAll('"attachments/', '"' + attachmentsUrl)
        
        var html = HTMLParser.parse(htmlContent)
        
        ///////////////////////////////////////
        // Remove breadcrumbs section (as we have the navigation bar anyway)

        var htmlBreadcrumbs = html.querySelector('#breadcrumbs')
        if (htmlBreadcrumbs) {
            htmlBreadcrumbs.set_content('')
        }

        ///////////////////////////////////////
        // Remove footer (as that only contains info about when the HTML document was generated and a link to Atlassian)

        var htmlFooter = html.querySelector('#footer')
        if (htmlFooter) {
            htmlFooter.set_content('')
        }

        ///////////////////////////////////////
        // Remove page title (as that would output the page title twice)

        var htmlPageTitle = html.querySelector('title')
        if (htmlPageTitle) {
            htmlPageTitle.set_content('')
        }

        ///////////////////////////////////////
        // Remove header from content (as Azure DevOps displays a title based on the file name)

        var htmlMainHeader = html.querySelector('#main-header')
        htmlMainHeader.set_content('')

        ///////////////////////////////////////
        // Remove query strings from attachment includes (as this doesn't display in Azure DevOps)

        var htmlImages = html.querySelectorAll('img')
        for (const htmlImage of htmlImages) {
            const src = htmlImage.attributes['src']
            if (src.indexOf(newAttachmentsDirectoryName) >= 0) {
                var indexOfQuery = src.indexOf('?')
                if (indexOfQuery >= 0) {
                    const newSrc = src.substring(0, indexOfQuery)
                    htmlImage.setAttribute('src', newSrc)
                }
            }
        }

        ///////////////////////////////////////
        // Remove Page metadata (author, creation time) (since we don't need them in DevOps)

        var htmlPageMetadata = html.querySelector('#content div.page-metadata')
        htmlPageMetadata?.set_content('')

        ///////////////////////////////////////
        // Show "information macro"-widgets as blockquotes

        const macroBodies = html.querySelectorAll('div.confluence-information-macro > p.title, div.confluence-information-macro-body, div.confluence-information-macro-body p, div.confluence-information-macro-body div, div.confluence-information-macro-body li, div.confluence-information-macro-body pre')
        for (const macroBody of macroBodies) {
            if (macroBody.innerText?.trim() != '')
                macroBody.insertAdjacentHTML('afterbegin', '{{tmp:blockquote}}')
        }

        ///////////////////////////////////////
        // Replace known custom scripts

        const attachments = html.querySelector('#attachments')?.parentNode?.nextElementSibling?.querySelectorAll('a')
        const customScripts = html.querySelectorAll('script.ap-iframe-body-script')

        for (const customScript of customScripts) {
            const scriptContent = customScript.innerHTML

            if (scriptContent.indexOf('com.gliffy.integration.confluence') >= 0) {
                ///////////////////////////////////////
                // Gliffy Diagrams

                const gliffyExtractPageIdRegex = /(?:.*container=|^)(\d+)/
                const gliffyExtractAttachmentIdRegex = /(?:.*imageAttachmentId=att|^)(\d+)/
                const gliffyExtractNameRegex = /(?:.*[\|"]name=)([^|\\]*)/

                const pageIdMatch = scriptContent.match(gliffyExtractPageIdRegex)
                if (!pageIdMatch || pageIdMatch.length < 2) continue;

                // The attachment id is not easy to resolve. If we can't get it directly, we need to find it by name.
                var attachmentId = ''
                const attachmentIdMatch = scriptContent.match(gliffyExtractAttachmentIdRegex)
                if (attachmentIdMatch && attachmentIdMatch.length > 1) {
                    attachmentId = attachmentIdMatch[1]
                } else {
                    // Find the image by attachment name and use the most recent one
                    const nameMatch = scriptContent.match(gliffyExtractNameRegex)
                    if (!nameMatch || nameMatch.length < 2) continue;

                    const imageAttachments = attachments.filter(x => x.innerText == nameMatch[1] + '.png')
                    if (imageAttachments.length > 0) {
                        const latestAttachment = imageAttachments[imageAttachments.length - 1]
                        const href = latestAttachment.attributes['href']
                        attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))
                    }
                }

                //console.log('PageId: ' + pageId[1] + '; AttachmentId: ' + attachmentId[1])

                const attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'

                customScript.set_content('')
                customScript.insertAdjacentHTML('beforebegin', '<img src="' + attachmentUrl + '" />')

            } else if (scriptContent.indexOf('com.balsamiq.mockups.confluence') >= 0) {
                ///////////////////////////////////////
                // Balsamiq Mockups

                const balsamiqExtractPageIdRegex = /(?:.*pageid=|^)(\d+)/
                const balsamiqExtractDownloadLinkRegex = /(?:.*[\|"]downloadLink=)([^|\\]*)/
                const balsamiqExtractNameRegex = /(?:.*&name=)([^&\"]*)/

                const pageIdMatch = scriptContent.match(balsamiqExtractPageIdRegex)
                if (!pageIdMatch || pageIdMatch.length < 2) continue;

                // Find the image by attachment name and use the most recent one
                var attachmentUrl = null
                const downloadLinkMatch = scriptContent.match(balsamiqExtractDownloadLinkRegex)
                if (downloadLinkMatch && downloadLinkMatch.length > 1) {
                    const downloadLink = downloadLinkMatch[1]
                    const lastIndexOfSlash = downloadLink.lastIndexOf('/')
                    const fileName = downloadLink.substring(lastIndexOfSlash + 1)
    
                    const imageAttachments = attachments.filter(x => x.innerText == fileName)
                    if (imageAttachments.length > 0) {
                        const latestAttachment = imageAttachments[imageAttachments.length - 1]
                        const href = latestAttachment.attributes['href']
                        const attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))
    
                        //console.log('PageId: ' + pageId[1] + '; AttachmentId: ' + attachmentId[1])
                        attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'
                    }
                } else {
                    const nameMatch = scriptContent.match(balsamiqExtractNameRegex)
                    if (nameMatch && nameMatch.length > 1) {
                        const name = decodeURIComponent(nameMatch[1].replaceAll('+', ' '))
                        console.log('- ' + name)

                        const imageAttachments = attachments.filter(x => x.innerText == 'mockup_' + name + '.png')
                        if (imageAttachments.length > 0) {
                            const latestAttachment = imageAttachments[imageAttachments.length - 1]
                            const href = latestAttachment.attributes['href']
                            const attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))

                            attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'
                        }
                    }
                }
                
                if (attachmentUrl) {
                    customScript.set_content('')
                    customScript.insertAdjacentHTML('beforebegin', '<img src="' + attachmentUrl + '" />')
                }
            }
        }

        ///////////////////////////////////////
        // Fix double strong text (<strong>Some <strong>text</strong> example</strong>)

        const doubleStrong = html.querySelectorAll('strong strong')
        for (const strong of doubleStrong) {
            strong.insertAdjacentHTML('beforebegin', strong.innerHTML)
            strong.set_content('')
        }

        ///////////////////////////////////////
        // Convert Attachments to list

        const attachmentsContainer = html.querySelector('#attachments')?.parentNode?.nextElementSibling
        if (attachmentsContainer) {
            attachmentsContainer.set_content(
                '<ul>' + 
                attachmentsContainer.innerHTML.replaceAll('<img', '<li><img').replaceAll('<br>', '</li>') + 
                '</ul>')
        }

        ///////////////////////////////////////
        // Convert HTML to Markdown

        var markdown = turndownService.turndown(html.toString())

        ///////////////////////////////////////
        // Trim lines at the end (we can't trim at the start since Markdown relies on indentation at the beginning)
        // Fix Quotations
        var lines = markdown.split('\n')
        for (let i = 0; i < lines.length; i++) {
            var line = lines[i]

            if (line.indexOf('{{tmp:blockquote}}') >= 0) {
                line = '> ' + line.replaceAll('{{tmp:blockquote}}', '')
                if (line.trim() == '>') {
                    line = ''
                }
            }

            lines[i] = line.trimEnd()
        }
        markdown = lines.join('\n')

        ///////////////////////////////////////
        // Remove excess empty lines (more than one empty line between text)

        var oldMarkdown = ''
        do {
            oldMarkdown = markdown
            markdown = markdown.replaceAll('\n\n\n', '\n\n')
        } while (oldMarkdown != markdown)

        ///////////////////////////////////////
        // Remove empty lines between quotes (as that shows up as separate blockquote-items)

        lines = markdown.split('\n')
        for (let i = 0; i < lines.length; i++) {
            const previousLine = i > 0 ? lines[i - 1] : null
            const currentLine = lines[i]
            const nextLine = i < lines.length - 1 ? lines[i + 1] : null
            
            if (previousLine && previousLine.startsWith('> ') && nextLine && nextLine.startsWith('> ') && currentLine.trim() == '') {
                lines[i] = '>'
            }
        }
        markdown = lines.join('\n').trim();

        ///////////////////////////////////////
        // Write markdown to disk

        await fs.mkdir(markdownFileDirectory, { recursive: true })
        await fs.writeFile(markdownFileFullPath, markdown)
    }

    ///////////////////////////////////////
    // Update .order file (we currently only support the root level)

    const orderFilePath = path.join(markdownDirectory, '.order')
    var orderFileContent = ''
    for (const file of files) {
        if (file.parents.length == 0) {
            orderFileContent += file.markdownFileName.replace('.md', '') + '\n'
        }
    }

    await fs.writeFile(orderFilePath, orderFileContent)

    console.log('finished')
 })()
diff --git a/package.json b/package.json
 {
  "name": "confluence-export",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1",
    "start": "node index.js"
  },
  "author": "",
  "license": "ISC",
  "dependencies": {
    "node-html-parser": "^6.1.0",
    "sanitize-filename": "^1.6.3",
    "turndown-plugin-confluence-to-gfm": "^0.5.0"
  }
 }
	const fs = require('fs/promises')
	const path = require('path')
	//const util = require('util')
	const sanitize = require("sanitize-filename")
	const HTMLParser = require('node-html-parser')

	const TurndownService = require('turndown')
	const TurndownPluginGfmService = require('@guyplusplus/turndown-plugin-gfm')
	const TurndownPluginConfluenceToGfmService = require('turndown-plugin-confluence-to-gfm')

	const turndownService = new TurndownService()
	TurndownPluginGfmService.gfm(turndownService)
	TurndownPluginConfluenceToGfmService.confluenceGfm(turndownService)


	///////////////////////////////////////
	// Source and destination folders

	// A directory which has been exported from Confluence using the HTML export.
	const htmlDirectory = 'C:/temp/Confluence-export/ABC'

	// The target directory
	const markdownDirectory = 'c:/temp/AzDevOps/docs/ABC'


	; (async () => {

	const newAttachmentsDirectoryName = '.attachments'

	///////////////////////////////////////
	// Read folder structure, page titles, etc from index.html

	var addChildren = function(ul, result, parentFolders) {
	if (!ul) {
	return
	}

	for (const listItemNode of ul.childNodes) {
	if (listItemNode.tagName != 'LI') continue;

	var obj = {
	htmlFileName: '',
	markdownFileName: '',
	folderName: null,
	parents: parentFolders
	}

	result.push(obj)

	var addAsParent = true

	for (const listItemChildNode of listItemNode.childNodes) {
	if (listItemChildNode.tagName == 'A') {
	obj.htmlFileName = listItemChildNode.attributes['href']

	var pageTitle = listItemChildNode.innerText

	pageTitle = pageTitle
	.replaceAll('&', '&')
	.replaceAll(''', '\'')
	.replaceAll('"', '"')
	.replaceAll('/', '\|')
	// https://learn.microsoft.com/en-us/azure/devops/project/wiki/wiki-file-structure?view=azure-devops#special-characters-in-wiki-page-titles
	.replaceAll(':', '%3A')
	.replaceAll('<', '%3C')
	.replaceAll('>', '%3E')
	.replaceAll('*', '%2A')
	.replaceAll('?', '%3F')
	.replaceAll('\|', '%7C')
	.replaceAll('-', '%2D')
	.replaceAll('"', '%22')

	var sanitizedPageTitle = sanitize(pageTitle)

	// Azure DevOps Wiki doesn't allow spaces
	sanitizedPageTitle = sanitizedPageTitle.replaceAll(' ', '-')

	obj.markdownFileName = sanitizedPageTitle + '.md'
	obj.folderName = sanitizedPageTitle
	}

	if (listItemChildNode.tagName == 'IMG') {
	// The sole root item is the home page (with an img tag to recognize it) and we don't need a separate folder for it.
	addAsParent = false
	}

	if (listItemChildNode.tagName == 'UL') {
	const newParents = [...parentFolders]
	if (addAsParent) {
	newParents.push(obj.folderName)
	}

	addChildren(listItemChildNode, result, newParents)
	}
	}
	}
	}

	var getFiles = async function() {
	const indexFilePath = path.join(htmlDirectory, 'index.html')
	const htmlContent = await fs.readFile(indexFilePath, { encoding: 'utf-8'} )

	const html = HTMLParser.parse(htmlContent)
	const pageSection = html.querySelector('div.pageSection ul')

	var files = []
	addChildren(pageSection, files, [])

	files.unshift({
	htmlFileName: 'index.html',
	markdownFileName: 'index.md',
	pageTitle: html.querySelector('head title').innerText,
	folderName: null,
	parents: []
	})

	return files
	}

	var files = await getFiles()

	// console.log(util.inspect(files, { depth: null, colors: true }))
	// return

	///////////////////////////////////////
	// Copy "attachments"-directory to ".attachments" (Azure DevOps requires this name)

	const existingAttachmentsDirectory = path.join(htmlDirectory, 'attachments')
	const newAttachmentsDirectory = path.join(markdownDirectory, newAttachmentsDirectoryName)

	var copyRecursiveSync = async function(src, dest) {
	// https://stackoverflow.com/a/22185855

	var stats = await fs.stat(src)
	var isDirectory = stats.isDirectory()
	if (isDirectory) {
	await fs.mkdir(dest, { recursive: true })
	const files = await fs.readdir(src)
	for (const childItemName of files) {
	await copyRecursiveSync(path.join(src, childItemName), path.join(dest, childItemName))
	}
	} else {
	await fs.copyFile(src, dest)
	}
	};
	await copyRecursiveSync(existingAttachmentsDirectory, newAttachmentsDirectory)

	///////////////////////////////////////
	// Convert pages

	for (const file of files) {
	const htmlFileFullPath = path.join(htmlDirectory, file.htmlFileName)
	const markdownFileDirectory = path.join(markdownDirectory, ...file.parents)
	const markdownFileFullPath = path.join(markdownFileDirectory, file.markdownFileName)

	console.log(htmlFileFullPath + " --> " + markdownFileFullPath)

	///////////////////////////////////////
	// Load file content

	var htmlContent = await fs.readFile(htmlFileFullPath, { encoding: 'utf-8'} )

	///////////////////////////////////////
	// Replace links

	for (const linkFile of files) {
	var target = '../'.repeat(file.parents.length)
	target += linkFile.parents.join('/') + (linkFile.parents.length > 0 ? '/' : '')
	target += linkFile.markdownFileName

	//console.log(' - ' + linkFile.htmlFileName + ': ' + target)

	htmlContent = htmlContent.replaceAll(linkFile.htmlFileName, target)
	}

	///////////////////////////////////////
	// Replace attachment links (since we place files in subfolders)

	const attachmentsUrl = '../'.repeat(file.parents.length) + newAttachmentsDirectoryName + '/'
	htmlContent = htmlContent.replaceAll('"attachments/', '"' + attachmentsUrl)

	var html = HTMLParser.parse(htmlContent)

	///////////////////////////////////////
	// Remove breadcrumbs section (as we have the navigation bar anyway)

	var htmlBreadcrumbs = html.querySelector('#breadcrumbs')
	if (htmlBreadcrumbs) {
	htmlBreadcrumbs.set_content('')
	}

	///////////////////////////////////////
	// Remove footer (as that only contains info about when the HTML document was generated and a link to Atlassian)

	var htmlFooter = html.querySelector('#footer')
	if (htmlFooter) {
	htmlFooter.set_content('')
	}

	///////////////////////////////////////
	// Remove page title (as that would output the page title twice)

	var htmlPageTitle = html.querySelector('title')
	if (htmlPageTitle) {
	htmlPageTitle.set_content('')
	}

	///////////////////////////////////////
	// Remove header from content (as Azure DevOps displays a title based on the file name)

	var htmlMainHeader = html.querySelector('#main-header')
	htmlMainHeader.set_content('')

	///////////////////////////////////////
	// Remove query strings from attachment includes (as this doesn't display in Azure DevOps)

	var htmlImages = html.querySelectorAll('img')
	for (const htmlImage of htmlImages) {
	const src = htmlImage.attributes['src']
	if (src.indexOf(newAttachmentsDirectoryName) >= 0) {
	var indexOfQuery = src.indexOf('?')
	if (indexOfQuery >= 0) {
	const newSrc = src.substring(0, indexOfQuery)
	htmlImage.setAttribute('src', newSrc)
	}
	}
	}

	///////////////////////////////////////
	// Remove Page metadata (author, creation time) (since we don't need them in DevOps)

	var htmlPageMetadata = html.querySelector('#content div.page-metadata')
	htmlPageMetadata?.set_content('')

	///////////////////////////////////////
	// Show "information macro"-widgets as blockquotes

	const macroBodies = html.querySelectorAll('div.confluence-information-macro > p.title, div.confluence-information-macro-body, div.confluence-information-macro-body p, div.confluence-information-macro-body div, div.confluence-information-macro-body li, div.confluence-information-macro-body pre')
	for (const macroBody of macroBodies) {
	if (macroBody.innerText?.trim() != '')
	macroBody.insertAdjacentHTML('afterbegin', '{{tmp:blockquote}}')
	}

	///////////////////////////////////////
	// Replace known custom scripts

	const attachments = html.querySelector('#attachments')?.parentNode?.nextElementSibling?.querySelectorAll('a')
	const customScripts = html.querySelectorAll('script.ap-iframe-body-script')

	for (const customScript of customScripts) {
	const scriptContent = customScript.innerHTML

	if (scriptContent.indexOf('com.gliffy.integration.confluence') >= 0) {
	///////////////////////////////////////
	// Gliffy Diagrams

	const gliffyExtractPageIdRegex = /(?:.*container=\|^)(\d+)/
	const gliffyExtractAttachmentIdRegex = /(?:.*imageAttachmentId=att\|^)(\d+)/
	const gliffyExtractNameRegex = /(?:.[\\|"]name=)([^\|\\])/

	const pageIdMatch = scriptContent.match(gliffyExtractPageIdRegex)
	if (!pageIdMatch \|\| pageIdMatch.length < 2) continue;

	// The attachment id is not easy to resolve. If we can't get it directly, we need to find it by name.
	var attachmentId = ''
	const attachmentIdMatch = scriptContent.match(gliffyExtractAttachmentIdRegex)
	if (attachmentIdMatch && attachmentIdMatch.length > 1) {
	attachmentId = attachmentIdMatch[1]
	} else {
	// Find the image by attachment name and use the most recent one
	const nameMatch = scriptContent.match(gliffyExtractNameRegex)
	if (!nameMatch \|\| nameMatch.length < 2) continue;

	const imageAttachments = attachments.filter(x => x.innerText == nameMatch[1] + '.png')
	if (imageAttachments.length > 0) {
	const latestAttachment = imageAttachments[imageAttachments.length - 1]
	const href = latestAttachment.attributes['href']
	attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))
	}
	}

	//console.log('PageId: ' + pageId[1] + '; AttachmentId: ' + attachmentId[1])

	const attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'

	customScript.set_content('')
	customScript.insertAdjacentHTML('beforebegin', '<img src="' + attachmentUrl + '" />')

	} else if (scriptContent.indexOf('com.balsamiq.mockups.confluence') >= 0) {
	///////////////////////////////////////
	// Balsamiq Mockups

	const balsamiqExtractPageIdRegex = /(?:.*pageid=\|^)(\d+)/
	const balsamiqExtractDownloadLinkRegex = /(?:.[\\|"]downloadLink=)([^\|\\])/
	const balsamiqExtractNameRegex = /(?:.&name=)([^&\"])/

	const pageIdMatch = scriptContent.match(balsamiqExtractPageIdRegex)
	if (!pageIdMatch \|\| pageIdMatch.length < 2) continue;

	// Find the image by attachment name and use the most recent one
	var attachmentUrl = null
	const downloadLinkMatch = scriptContent.match(balsamiqExtractDownloadLinkRegex)
	if (downloadLinkMatch && downloadLinkMatch.length > 1) {
	const downloadLink = downloadLinkMatch[1]
	const lastIndexOfSlash = downloadLink.lastIndexOf('/')
	const fileName = downloadLink.substring(lastIndexOfSlash + 1)

	const imageAttachments = attachments.filter(x => x.innerText == fileName)
	if (imageAttachments.length > 0) {
	const latestAttachment = imageAttachments[imageAttachments.length - 1]
	const href = latestAttachment.attributes['href']
	const attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))

	//console.log('PageId: ' + pageId[1] + '; AttachmentId: ' + attachmentId[1])
	attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'
	}
	} else {
	const nameMatch = scriptContent.match(balsamiqExtractNameRegex)
	if (nameMatch && nameMatch.length > 1) {
	const name = decodeURIComponent(nameMatch[1].replaceAll('+', ' '))
	console.log('- ' + name)

	const imageAttachments = attachments.filter(x => x.innerText == 'mockup_' + name + '.png')
	if (imageAttachments.length > 0) {
	const latestAttachment = imageAttachments[imageAttachments.length - 1]
	const href = latestAttachment.attributes['href']
	const attachmentId = href.substring(href.lastIndexOf('/') + 1, href.lastIndexOf('.'))

	attachmentUrl = attachmentsUrl + pageIdMatch[1] + '/' + attachmentId + '.png'
	}
	}
	}

	if (attachmentUrl) {
	customScript.set_content('')
	customScript.insertAdjacentHTML('beforebegin', '<img src="' + attachmentUrl + '" />')
	}
	}
	}

	///////////////////////////////////////
	// Fix double strong text (<strong>Some <strong>text</strong> example</strong>)

	const doubleStrong = html.querySelectorAll('strong strong')
	for (const strong of doubleStrong) {
	strong.insertAdjacentHTML('beforebegin', strong.innerHTML)
	strong.set_content('')
	}

	///////////////////////////////////////
	// Convert Attachments to list

	const attachmentsContainer = html.querySelector('#attachments')?.parentNode?.nextElementSibling
	if (attachmentsContainer) {
	attachmentsContainer.set_content(
	'<ul>' +
	attachmentsContainer.innerHTML.replaceAll('<img', '<li><img').replaceAll('<br>', '</li>') +
	'</ul>')
	}

	///////////////////////////////////////
	// Convert HTML to Markdown

	var markdown = turndownService.turndown(html.toString())

	///////////////////////////////////////
	// Trim lines at the end (we can't trim at the start since Markdown relies on indentation at the beginning)
	// Fix Quotations
	var lines = markdown.split('\n')
	for (let i = 0; i < lines.length; i++) {
	var line = lines[i]

	if (line.indexOf('{{tmp:blockquote}}') >= 0) {
	line = '> ' + line.replaceAll('{{tmp:blockquote}}', '')
	if (line.trim() == '>') {
	line = ''
	}
	}

	lines[i] = line.trimEnd()
	}
	markdown = lines.join('\n')

	///////////////////////////////////////
	// Remove excess empty lines (more than one empty line between text)

	var oldMarkdown = ''
	do {
	oldMarkdown = markdown
	markdown = markdown.replaceAll('\n\n\n', '\n\n')
	} while (oldMarkdown != markdown)

	///////////////////////////////////////
	// Remove empty lines between quotes (as that shows up as separate blockquote-items)

	lines = markdown.split('\n')
	for (let i = 0; i < lines.length; i++) {
	const previousLine = i > 0 ? lines[i - 1] : null
	const currentLine = lines[i]
	const nextLine = i < lines.length - 1 ? lines[i + 1] : null

	if (previousLine && previousLine.startsWith('> ') && nextLine && nextLine.startsWith('> ') && currentLine.trim() == '') {
	lines[i] = '>'
	}
	}
	markdown = lines.join('\n').trim();

	///////////////////////////////////////
	// Write markdown to disk

	await fs.mkdir(markdownFileDirectory, { recursive: true })
	await fs.writeFile(markdownFileFullPath, markdown)
	}

	///////////////////////////////////////
	// Update .order file (we currently only support the root level)

	const orderFilePath = path.join(markdownDirectory, '.order')
	var orderFileContent = ''
	for (const file of files) {
	if (file.parents.length == 0) {
	orderFileContent += file.markdownFileName.replace('.md', '') + '\n'
	}
	}

	await fs.writeFile(orderFilePath, orderFileContent)

	console.log('finished')
	})()
	{
	"name": "confluence-export",
	"version": "1.0.0",
	"description": "",
	"main": "index.js",
	"scripts": {
	"test": "echo \"Error: no test specified\" && exit 1",
	"start": "node index.js"
	},
	"author": "",
	"license": "ISC",
	"dependencies": {
	"node-html-parser": "^6.1.0",
	"sanitize-filename": "^1.6.3",
	"turndown-plugin-confluence-to-gfm": "^0.5.0"
	}
	}