Compare commits

..

No commits in common. "master" and "v0.1.21" have entirely different histories.

14 changed files with 203 additions and 286 deletions

View File

@ -1,36 +0,0 @@
name: Build and publish
on:
- workflow_dispatch
# push:
# branches:
# - master
jobs:
build-and-publish:
runs-on: ubuntu-latest
env:
NPM_PUBLISH_TOKEN: ${{ secrets.NPM_TOKEN }}
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: Use Node.js 20
uses: actions/setup-node@v3
with:
node-version: 20
- name: Login to package registry
run: |
npm config set @doc-utils:registry https://gitea.jbrumond.me/api/packages/doc-utils/npm/
npm config set -- '//gitea.jbrumond.me/api/packages/doc-utils/npm/:_authToken' "$NPM_PUBLISH_TOKEN"
- name: Install dependencies
run: npm ci
- name: Compile TypeScript
run: npm run tsc
- name: Publish package
run: npm publish

View File

@ -1,40 +0,0 @@
name: Build and test
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
build-and-test:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [18.x, 20.x]
steps:
- name: Check out the repo
uses: actions/checkout@v3
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
- name: Login to package registry
run: |
npm config set @doc-utils:registry https://gitea.jbrumond.me/api/packages/doc-utils/npm/
npm config set -- '//gitea.jbrumond.me/api/packages/doc-utils/npm/:_authToken' "$NPM_PUBLISH_TOKEN"
- name: Install dependencies
run: npm ci
- name: Compile TypeScript
run: npm run tsc
# todo: tests
- name: Run tests
run: exit 0

34
package-lock.json generated
View File

@ -1,18 +1,18 @@
{
"name": "@doc-utils/markdown2html",
"version": "0.3.6",
"version": "0.1.21",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@doc-utils/markdown2html",
"version": "0.3.6",
"version": "0.1.21",
"dependencies": {
"bytefield-svg": "^1.6.1",
"dompurify": "^2.3.6",
"jsdom": "^20.0.1",
"katex": "^0.16.7",
"marked": "^5.0.2",
"marked": "^4.1.1",
"nomnoml": "^1.5.2",
"pikchr": "^0.0.5",
"prismjs": "^1.29.0",
@ -28,7 +28,7 @@
"@types/jsdom": "^20.0.0",
"@types/katex": "^0.16.0",
"@types/luxon": "^3.1.0",
"@types/marked": "^5.0.0",
"@types/marked": "^4.0.3",
"@types/node": "^18.11.18",
"@types/prismjs": "^1.26.0",
"@types/qrcode": "^1.5.0",
@ -86,9 +86,9 @@
"dev": true
},
"node_modules/@types/marked": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@types/marked/-/marked-5.0.0.tgz",
"integrity": "sha512-YcZe50jhltsCq7rc9MNZC/4QB/OnA2Pd6hrOSTOFajtabN+38slqgDDCeE/0F83SjkKBQcsZUj7VLWR0H5cKRA==",
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/@types/marked/-/marked-4.3.0.tgz",
"integrity": "sha512-zK4gSFMjgslsv5Lyvr3O1yCjgmnE4pr8jbG8qVn4QglMwtpvPCf4YT2Wma7Nk95OxUUJI8Z+kzdXohbM7mVpGw==",
"dev": true
},
"node_modules/@types/node": {
@ -954,14 +954,14 @@
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="
},
"node_modules/marked": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/marked/-/marked-5.0.2.tgz",
"integrity": "sha512-TXksm9GwqXCRNbFUZmMtqNLvy3K2cQHuWmyBDLOrY1e6i9UvZpOTJXoz7fBjYkJkaUFzV9hBFxMuZSyQt8R6KQ==",
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
"integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==",
"bin": {
"marked": "bin/marked.js"
},
"engines": {
"node": ">= 18"
"node": ">= 12"
}
},
"node_modules/mime-db": {
@ -2015,9 +2015,9 @@
"dev": true
},
"@types/marked": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@types/marked/-/marked-5.0.0.tgz",
"integrity": "sha512-YcZe50jhltsCq7rc9MNZC/4QB/OnA2Pd6hrOSTOFajtabN+38slqgDDCeE/0F83SjkKBQcsZUj7VLWR0H5cKRA==",
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/@types/marked/-/marked-4.3.0.tgz",
"integrity": "sha512-zK4gSFMjgslsv5Lyvr3O1yCjgmnE4pr8jbG8qVn4QglMwtpvPCf4YT2Wma7Nk95OxUUJI8Z+kzdXohbM7mVpGw==",
"dev": true
},
"@types/node": {
@ -2651,9 +2651,9 @@
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="
},
"marked": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/marked/-/marked-5.0.2.tgz",
"integrity": "sha512-TXksm9GwqXCRNbFUZmMtqNLvy3K2cQHuWmyBDLOrY1e6i9UvZpOTJXoz7fBjYkJkaUFzV9hBFxMuZSyQt8R6KQ=="
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz",
"integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A=="
},
"mime-db": {
"version": "1.52.0",

View File

@ -1,6 +1,6 @@
{
"name": "@doc-utils/markdown2html",
"version": "0.3.6",
"version": "0.1.21",
"publishConfig": {
"registry": "https://gitea.home.jbrumond.me/api/packages/doc-utils/npm/"
},
@ -17,7 +17,7 @@
"@types/jsdom": "^20.0.0",
"@types/katex": "^0.16.0",
"@types/luxon": "^3.1.0",
"@types/marked": "^5.0.0",
"@types/marked": "^4.0.3",
"@types/node": "^18.11.18",
"@types/prismjs": "^1.26.0",
"@types/qrcode": "^1.5.0",
@ -28,7 +28,7 @@
"dompurify": "^2.3.6",
"jsdom": "^20.0.1",
"katex": "^0.16.7",
"marked": "^5.0.2",
"marked": "^4.1.1",
"nomnoml": "^1.5.2",
"pikchr": "^0.0.5",
"prismjs": "^1.29.0",

View File

@ -1,21 +0,0 @@
import { marked } from 'marked';
import { MarkdownOptions } from './render';
export const placeholder_base_url = 'https://markdown2html.base-url.placeholder.invalid';
export function base_url_walk_tokens(token: marked.Token, options: MarkdownOptions) {
if (options.base_url) {
const base_url = options.base_url.startsWith('http://') || options.base_url.startsWith('https://')
? options.base_url
: placeholder_base_url + options.base_url;
if (token.type === 'link' || token.type === 'image') {
token.href = (new URL(token.href, base_url)).toString();
if (token.href.startsWith(placeholder_base_url)) {
token.href = token.href.slice(placeholder_base_url.length);
}
}
}
}

64
src/breadcrumb-nav.ts Normal file
View File

@ -0,0 +1,64 @@
import { marked } from 'marked';
import { ParsedAttributes, parse_attributes } from './attrs';
import { MarkdownOptions } from './render';
// todo: deprecate this
export interface BreadcrumbNavToken extends marked.Tokens.Generic {
text: string;
attrs: ParsedAttributes;
items: marked.Token[][];
}
export function breadcrumb_nav_ext(renderer: marked.Renderer, opts: MarkdownOptions) : marked.TokenizerExtension & marked.RendererExtension {
return {
name: 'breadcrumb_nav',
level: 'block',
start: (src) => src.match(/^\/\/\//)?.index,
tokenizer(src, tokens) {
const rule = /^\/\/\/(\/*)([^\n]+)?(?:\n)((?:[^\/]|\/\/?(?!\/\1))+)\/\/\/\1/;
const match = rule.exec(src);
if (match) {
const token: BreadcrumbNavToken = {
type: 'breadcrumb_nav',
raw: match[0],
text: match[3],
attrs: parse_attributes(match[2] || ''),
tokens: [ ],
items: [ ],
};
const lines = match[3].trim().split('\n');
for (const line of lines) {
const tokens = this.lexer.inlineTokens(line, [ ]);
token.tokens.push(...tokens);
token.items.push(tokens);
}
return token;
}
},
renderer(token: BreadcrumbNavToken) {
return `<nav aria-label="breadcrumbs" ${token.attrs.html_attrs.join(' ')}>\n`
+ `\t<ol>\n`
+ '\t\t'
+ token.items.map((tokens, index) =>{
let item = '<li>\n';
if (index) {
item += '\t\t\t<span class="separator" aria-hidden="true">/</span>\n';
}
item += `\t\t\t${this.parser.parseInline(tokens, renderer)}\n`;
return item + '\t\t</li>';
}).join('\n\t\t')
+ '\n'
+ `\t</ol>\n`
+ `</nav>`;
}
};
}

View File

@ -3,19 +3,14 @@ import { marked } from 'marked';
import { MarkdownOptions } from './render';
export interface DescriptionListToken extends marked.Tokens.Generic {
type: 'description_list';
items: (DescriptionTermToken | DescriptionDetailToken)[];
}
export type DescriptionElemToken = DescriptionTermToken | DescriptionDetailToken;
export interface DescriptionTermToken extends marked.Tokens.Generic {
type: 'description_term';
text: string;
}
export interface DescriptionDetailToken extends marked.Tokens.Generic {
type: 'description_detail';
text: string;
}
@ -23,106 +18,72 @@ export function description_list_ext(renderer: marked.Renderer, opts: MarkdownOp
return {
name: 'description_list',
level: 'block',
start: (src) => src.match(/^: /)?.index,
start: (src) => src.match(/^:[:#-]/)?.index,
tokenizer(src, tokens) {
const start = src.match(/^: /)?.index;
const lines = src.slice(start).split(/\n/g);
const token: DescriptionListToken = {
type: 'description_list',
raw: '',
items: [ ]
};
const rule = /^(?::[:#-](?:\s[^\n]*)?(?:\n|$))+/;
const match = rule.exec(src);
let current: DescriptionElemToken;
const render_current = () => {
if (current) {
this.lexer.blockTokens(current.text, current.tokens);
current = null;
}
};
if (match) {
const token: DescriptionListToken = {
type: 'description_list',
raw: match[0],
items: [ ]
};
for (const line of lines) {
// Skip empty lines
if (! line.trim()) {
token.raw += line + '\n';
const items = token.raw.trim().split('\n');
const raw_buffer: string[] = [ ];
const text_buffer: string[] = [ ];
if (current) {
current.raw += line + '\n';
current.text += '\n';
const flush_buffer = () => {
if (! raw_buffer.length) {
return;
}
continue;
}
// Grab the second character from the first line to determine the
// token type (should be "#" or "-")
const type = raw_buffer[0][1] === '#' ? 'description_term' : 'description_detail';
const sub_token: (DescriptionTermToken | DescriptionDetailToken) = {
type,
raw: raw_buffer.join('\n'),
text: text_buffer.join('\n'),
tokens: [ ],
};
// If the line starts immediately with a colon, it is a <dt>
if (line.startsWith(': ')) {
render_current();
token.raw += line + '\n';
token.items.push(
current = {
type: 'description_term',
raw: line,
text: line.slice(2),
tokens: [ ],
raw_buffer.length = 0;
text_buffer.length = 0;
this.lexer.blockTokens(sub_token.text, sub_token.tokens);
token.items.push(sub_token);
};
for (const line of items) {
const rule = /^:([:#-])(?:\s([^\n]*))?(?:\n|$)/;
const match = rule.exec(line);
if (match) {
if (match[1] !== ':') {
flush_buffer();
}
);
continue;
}
// If the line starts with a colon after an indent, it is a <dd>
if (line.startsWith(' : ')) {
render_current();
token.raw += line + '\n';
token.items.push(
current = {
type: 'description_detail',
raw: line,
text: line.slice(4),
tokens: [ ],
}
);
continue;
}
// If the line starts with (at least) two indents, it is a child
// of the current element
if (line.startsWith(' ')) {
token.raw += line + '\n';
current.raw += '\n' + line;
current.text += '\n' + line.slice(current.type === 'description_term' ? 2 : 4);
continue;
}
// If the line starts with one indent, it is a child of the current
// <dt> (but is not allowed after a <dd>)
if (line.startsWith(' ')) {
if (current.type !== 'description_term') {
render_current();
break;
raw_buffer.push(match[0]);
text_buffer.push(match[2]);
}
token.raw += line + '\n';
current.raw += '\n' + line;
current.text += '\n' + line.slice(2);
continue;
}
// If the line starts any other way, it is the start of new content
// and we are done parsing
render_current();
break;
}
render_current();
if (token.items.length) {
flush_buffer();
return token;
}
},
renderer(token: DescriptionListToken) {
const items = token.items.map((item) => {
const tag = item.type === 'description_term' ? 'dt' : 'dd';
return `<${tag}>${this.parser.parse(item.tokens)}</${tag}>`;
return `
<${tag}>
${this.parser.parse(item.tokens)}
</${tag}>
`;
});
return `<dl>${items.join('')}</dl>`;

View File

@ -1,33 +0,0 @@
import type { marked } from 'marked';
import { MarkdownOptions } from './render';
export interface EmbedToken extends marked.Tokens.Generic {
media_type: string;
href: string;
}
export function embed_ext(renderer: marked.Renderer, opts: MarkdownOptions) : marked.TokenizerExtension & marked.RendererExtension {
return {
name: 'embed',
level: 'inline',
start: (src) => src.match(/\{&/)?.index,
tokenizer(src, tokens) {
const rule = /^\{&\s+([^\s]+):\s*([^\s]+)\s+&\}/;
const match = rule.exec(src);
if (match) {
return {
type: 'embed',
raw: match[0],
media_type: match[1],
href: match[2],
tokens: [ ]
};
}
},
renderer(token: EmbedToken) {
return `<embed type="${token.media_type}" src="${token.href}"></embed>`;
}
};
}

View File

@ -23,7 +23,7 @@ export function footnote_ref_ext(renderer: marked.Renderer, opts: MarkdownOption
level: 'inline',
start: (src) => src.match(/\[\^/)?.index,
tokenizer(src, tokens) {
const rule = /^\[\^([a-zA-Z0-9-\._, §]+)]/;
const rule = /^\[\^([a-zA-Z0-9-._§]+)]/;
const match = rule.exec(src);
if (match) {
@ -68,7 +68,7 @@ export function footnote_list_ext(renderer: marked.Renderer, opts: MarkdownOptio
};
let remaining = src;
const prefix_rule = /^\[\^([a-zA-Z0-9\., _§-]+)]:/;
const prefix_rule = /^\[\^([a-zA-Z0-9\._§-]+)]:/;
const whitespace_rule = /^\s*(?:\n|$)/;
if (! prefix_rule.test(src)) {

View File

@ -9,6 +9,5 @@ export function sanitize_html(html: string, custom_elements?: CustomElementHandl
const dom_purify = createDOMPurify(window as any as Window);
return dom_purify.sanitize(html, {
CUSTOM_ELEMENT_HANDLING: custom_elements,
ADD_TAGS: [ 'embed', 'object', 'video', 'audio', 'iframe' ],
});
}

View File

@ -1,4 +0,0 @@
import { languages } from 'prismjs';
languages.http['request-line'].pattern = /^(?:CONNECT|DELETE|GET|HEAD|QUERY|OPTIONS|PATCH|POST|PRI|PUT|SEARCH|TRACE)\s(?:https?:\/\/|\/)\S*\sHTTP\/[\d.]+/m;

View File

@ -4,7 +4,6 @@ import load_languages = require('prismjs/components/index');
load_languages();
require('./wasm');
require('./http');
// hooks.add('after-tokenize', (env) => {
// //

View File

@ -9,8 +9,7 @@ import { katex_block_ext, katex_inline_ext } from './katex';
import { footnote_list_ext, footnote_ref_ext } from './footnotes';
import { description_list_ext } from './description-list';
import { resolve_async_bindings } from './async-steps';
import { base_url_walk_tokens } from './base-url';
import { embed_ext } from './embed';
import { breadcrumb_nav_ext } from './breadcrumb-nav';
export interface MarkdownOptions {
base_url?: string;
@ -26,36 +25,13 @@ export interface MarkdownExtension {
}
export async function render_markdown_to_html(markdown: string, options: MarkdownOptions = { }) {
const marked_options = marked_opts(options);
setup_marked(options, marked_options);
const unsafe_html = options.inline
? marked.parseInline(markdown, marked_options)
: marked.parse(markdown, marked_options);
return sanitize_html(await resolve_async_bindings(unsafe_html), options.custom_elements);
}
export function render_markdown_to_html_inline_sync(markdown: string, options: MarkdownOptions = { }) {
const marked_options = marked_opts(options);
setup_marked(options, marked_options);
const unsafe_html = marked.parseInline(markdown, marked_options);
return sanitize_html(unsafe_html, options.custom_elements);
}
function marked_opts<T extends boolean>(options: MarkdownOptions) : marked.MarkedOptions {
return {
const marked_options: marked.MarkedOptions = {
baseUrl: options.base_url,
breaks: options.breaks || false,
renderer: create_renderer(options),
mangle: false,
headerIds: false,
};
}
function setup_marked(options: MarkdownOptions, marked_options: marked.MarkedOptions) {
marked.use({
walkTokens(token) {
base_url_walk_tokens(token, options);
},
extensions: [
katex_block_ext(marked_options.renderer, options),
katex_inline_ext(marked_options.renderer, options),
@ -65,7 +41,7 @@ function setup_marked(options: MarkdownOptions, marked_options: marked.MarkedOpt
description_list_ext(marked_options.renderer, options),
section_ext(marked_options.renderer, options),
icon_ext(marked_options.renderer, options),
embed_ext(marked_options.renderer, options),
breadcrumb_nav_ext(marked_options.renderer, options),
...(options.extensions || [ ]).map((ext) => {
return ext(marked_options.renderer, options);
}),
@ -78,4 +54,53 @@ function setup_marked(options: MarkdownOptions, marked_options: marked.MarkedOpt
}
},
});
const unsafe_html = options.inline
? marked.parseInline(markdown, marked_options)
: await new Promise<string>((resolve, reject) => {
marked.parse(markdown, marked_options, (error, unsafe_html) => {
if (error) {
return reject(error);
}
resolve_async_bindings(unsafe_html).then(resolve, reject);
});
});
return sanitize_html(unsafe_html, options.custom_elements);
}
export function render_markdown_to_html_inline_sync(markdown: string, options: MarkdownOptions = { }) {
const marked_options: marked.MarkedOptions = {
baseUrl: options.base_url,
breaks: options.breaks || false,
renderer: create_renderer(options),
};
marked.use({
extensions: [
katex_block_ext(marked_options.renderer, options),
katex_inline_ext(marked_options.renderer, options),
footnote_ref_ext(marked_options.renderer, options),
footnote_list_ext(marked_options.renderer, options),
mark_ext(marked_options.renderer, options),
description_list_ext(marked_options.renderer, options),
section_ext(marked_options.renderer, options),
icon_ext(marked_options.renderer, options),
breadcrumb_nav_ext(marked_options.renderer, options),
...(options.extensions || [ ]).map((ext) => {
return ext(marked_options.renderer, options);
}),
],
tokenizer: {
url(src) {
// disable auto-linking; more can be added here to auto-link only sometimes
// see: https://github.com/markedjs/marked/issues/882#issuecomment-781628889
return null;
}
},
});
const unsafe_html = marked.parseInline(markdown, marked_options);
return sanitize_html(unsafe_html, options.custom_elements);
}

View File

@ -28,16 +28,23 @@ export function create_renderer(opts: MarkdownOptions) {
}
function heading(renderer: marked.Renderer, opts: MarkdownOptions) {
return function(orig_text: string, level: 1 | 2 | 3 | 4 | 5 | 6, raw: string) {
let { text, id, html_attrs } = parse_attributes(orig_text);
if (id) {
text += `\n<a class="heading-anchor" href="#${id}">`
+ `\n\t\t${icons.link}`
+ `\n\t\t<span style="display: none">Section titled ${text}</span>`
+ `\n\t</a>`;
return function(orig_text: string, level: 1 | 2 | 3 | 4 | 5 | 6, raw: string, slugger) {
let { text, id, html_attrs } = parse_attributes(raw);
if (! id) {
id = slugger.slug(text);
html_attrs.push(`id="${id}"`);
}
return `\n<h${level} ${html_attrs.join(' ')}>\n\t${text}\n</h${level}>\n`;
return `
<h${level} ${html_attrs.join(' ')}>
${text}
<a class="heading-anchor" href="#${id}">
${icons.link}
<span style="display: none">Section titled ${text}</span>
</a>
</h${level}>
`;
};
}
@ -152,10 +159,6 @@ function code(renderer: marked.Renderer, opts: MarkdownOptions) {
const binding = bind_data_async(promise);
return figure(binding);
};
case 'yaml:calendar': {
// todo
};
default:
return figure(`<pre class="language-${args[0] || 'txt'}">${render_prism(code, args[0])}</pre>`);