Skip to content

Instantly share code, notes, and snippets.

@armornick
Created March 26, 2025 13:23
Show Gist options
  • Save armornick/5e5a052e05424ffeb0f49581a6c4ffb4 to your computer and use it in GitHub Desktop.
Save armornick/5e5a052e05424ffeb0f49581a6c4ffb4 to your computer and use it in GitHub Desktop.
Markdown to Docx converter
import { readFileSync, writeFileSync } from "node:fs";
import { marked } from "marked";
import { JSDOM } from "jsdom";
import {
AlignmentType, Document, ExternalHyperlink, HeadingLevel, LevelFormat,
Packer, Paragraph, ShadingType, Table, TableCell, TableRow, TextRun,
WidthType
} from "docx";
const INPUT_FILE = 'sample.md';
const OUTPUT_FILE = 'sample.docx';
const ELEMENT_NODE = 1;
const TEXT_NODE = 3;
const SPACING = {
before: 200,
}
const SHADING = {
type: ShadingType.SOLID,
color: "dee2e6",
// fill: "212529",
}
// -----------------------------------------------------------------------
const convertChildNodes = (/** @type {Element} */element) => {
const children = [];
const $nodes = element.childNodes;
for (const $node of $nodes) {
if ($node.nodeType === TEXT_NODE) {
children.push(new TextRun($node.textContent));
}
else if ($node.nodeType === ELEMENT_NODE) {
const tag = $node.tagName;
if (tag === 'A') {
children.push(new ExternalHyperlink({
children: [
new TextRun({
text: $node.textContent,
style: 'Hyperlink',
})
],
link: $node.href,
}));
}
else if (tag === 'KBD' || tag === 'CODE') {
children.push(new TextRun({
text: $node.textContent,
shading: SHADING,
}))
}
else if (tag === 'IMG') {
console.log('images not currently supported');
}
else {
const isBold = ($node.tagName === 'B' || $node.tagName === 'STRONG' || $node.querySelector('strong') != null);
const isItalic = ($node.tagName === 'EM' || $node.querySelector('em') != null);
const isStrike = ($node.tagName === 'S' || $node.tagName === 'DEL' || $node.querySelector('del') != null);
children.push(new TextRun({
text: $node.textContent,
bold: isBold,
italics: isItalic,
strike: isStrike,
}));
}
}
}
return children;
}
/**
* @param {any[]} children
* @param {HTMLUListElement} $list
*/
const convertUnorderedList = (children, $list, level = 0) => {
const $items = $list.querySelectorAll('li');
for (const $item of $items) {
const sublist = $item.querySelector('ul');
if (sublist) {
convertUnorderedList(children, sublist, level + 1);
}
else {
children.push(new Paragraph({
children: convertChildNodes($item),
bullet: { level },
}))
}
}
}
/**
*
* @param {any[]} children
* @param {HTMLOListElement} $list
*/
const convertOrderedList = (children, $list, level = 0) => {
const $items = $list.querySelectorAll('li');
for (const $item of $items) {
const sublist = $item.querySelector('ol');
if (sublist) {
convertOrderedList(children, sublist, level + 1);
}
else {
children.push(new Paragraph({
children: convertChildNodes($item),
numbering: { reference: 'numbered-list', level },
}))
}
}
}
/**
* @param {HTMLTableRowElement} $row
*/
const convertTableRow = ($row) => {
const cells = [];
const $cells = $row.cells;
for (const $cell of $cells) {
let alignment = undefined;
const $align = $cell.getAttribute('align');
if ($align === 'left') {
alignment === AlignmentType.LEFT;
}
else if ($align === 'center') {
alignment === AlignmentType.CENTER;
}
else if ($align === 'right') {
alignment === AlignmentType.RIGHT;
}
cells.push(new TableCell({
children: [new Paragraph({
text: $cell.textContent,
alignment,
})],
}));
}
return cells;
}
/**
* @param {any[]} children
* @param {HTMLTableElement} $table
*/
const convertTable = (children, $table) => {
const rows = [];
if ($table.tHead) {
const $rows = $table.tHead.rows;
if ($rows.length === 1) {
rows.push(new TableRow({
children: convertTableRow($rows.item(0)),
tableHeader: true,
}));
} else {
for (const $row of $rows) {
rows.push(new TableRow({
children: convertTableRow($row),
}));
}
}
}
for (const $tbody of $table.tBodies) {
const $rows = $tbody.rows;
for (const $row of $rows) {
rows.push(new TableRow({
children: convertTableRow($row),
}));
}
}
children.push(new Table({
rows,
width: {
size: 100,
type: WidthType.PERCENTAGE,
}
}));
// add an empty paragraph to make sure consecutive tables aren't merged
children.push(new Paragraph(''));
}
/**
*
* @param {any[]} children
* @param {HTMLPreElement} element
*/
const convertPreElement = (children, element) => {
const content = element.textContent;
const lines = content?.split('\n');
for (const line of lines) {
children.push(new Paragraph({
text: line,
shading: SHADING,
}));
}
// remove the final line (this is an empty line that has the pre tag itelf)
children.pop();
// add an empty paragraph to stop it from merging consecutive lines
children.push(new Paragraph(''));
}
/**
*
* @param {any[]} children
* @param {Element} element
*/
const convertElement = (children, element) => {
// skip whitespace
if (element.nodeType === TEXT_NODE &&
element.textContent.trim() === ''
) {
return;
}
const tag = element.tagName;
if (tag === 'H1') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_1,
spacing: SPACING,
}));
}
else if (tag === 'H2') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_2,
spacing: SPACING,
}));
}
else if (tag === 'H3') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_3,
spacing: SPACING,
}));
}
else if (tag === 'H4') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_4,
spacing: SPACING,
}));
}
else if (tag === 'H5') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_5,
spacing: SPACING,
}));
}
else if (tag === 'H6') {
children.push(new Paragraph({
text: element.textContent,
heading: HeadingLevel.HEADING_6,
spacing: SPACING,
}));
}
else if (tag === 'P') {
children.push(new Paragraph({
children: convertChildNodes(element),
spacing: SPACING,
}));
}
else if (tag === 'UL') {
convertUnorderedList(children, element);
}
else if (tag === 'OL') {
convertOrderedList(children, element);
}
else if (tag === 'TABLE') {
convertTable(children, element);
}
else if (tag === 'PRE') {
convertPreElement(children, element);
}
else if (tag === 'HR') {
children.push(new Paragraph({
text: '',
border: {
bottom: {
color: 'auto',
style: 'single',
}
}
}));
// add an empty paragraph to stop it from merging consecutive lines
children.push(new Paragraph(''));
}
else if (tag === 'DIV' || tag === 'BLOCKQUOTE') {
const $children = element.childNodes;
for (const $child of $children) {
convertElement(children, $child);
}
}
else {
console.log(`unrecognized tag: ${tag} (${element})`);
}
}
const convertFragToDoc = (/** @type {DocumentFragment} */element) => {
const children = [];
for (const $el of element.children) {
convertElement(children, $el);
}
return new Document({
numbering: {
config: [
{
reference: 'numbered-list',
levels: [
{
level: 0,
format: LevelFormat.DECIMAL_ENCLOSED_FULLSTOP,
text: "%1",
alignment: AlignmentType.START,
style: {
paragraph: {
indent: { left: 300 }
}
}
},
{
level: 1,
format: LevelFormat.DECIMAL_ENCLOSED_FULLSTOP,
text: "%1",
alignment: AlignmentType.START,
style: {
paragraph: {
indent: { left: 600 }
}
}
},
{
level: 2,
format: LevelFormat.DECIMAL_ENCLOSED_FULLSTOP,
text: "%1",
alignment: AlignmentType.START,
style: {
paragraph: {
indent: { left: 900 }
}
}
},
{
level: 3,
format: LevelFormat.DECIMAL_ENCLOSED_FULLSTOP,
text: "%1",
alignment: AlignmentType.START,
style: {
paragraph: {
indent: { left: 1200 }
}
}
}
]
}
]
},
sections: [
{
properties: {},
children,
}
]
});
};
// -----------------------------------------------------------------------
const main = async () => {
console.log(`reading ${INPUT_FILE}`);
const src = readFileSync(INPUT_FILE, 'utf-8');
// console.log(`processing input`);
const html = marked(src);
writeFileSync('sample.html', html, 'utf-8');
const fragment = JSDOM.fragment(html);
const document = convertFragToDoc(fragment);
console.log(`writing ${OUTPUT_FILE}`);
const buff = await Packer.toBuffer(document)
writeFileSync(OUTPUT_FILE, buff);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment