Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐍 Add parser support for additional markup features #1423

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
31 changes: 31 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 18 additions & 5 deletions packages/markdown-it-myst/src/directives.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ function replaceFences(state: StateCore): boolean {
token.info = match[1].trim();
token.meta = { arg: match[2] };
}
if (!match && token.markup.startsWith(':::')) {
token.type = 'directive';
token.meta = { options: { class: token.info?.trim() } };
token.info = 'div';
}
}
}
return true;
Expand All @@ -49,6 +54,7 @@ function runDirectives(state: StateCore): boolean {
token.content.trim() ? token.content.split(/\r?\n/) : [],
info,
state,
token.meta.options,
);
const { body, options } = content;
let { bodyOffset } = content;
Expand Down Expand Up @@ -106,6 +112,7 @@ function parseDirectiveContent(
content: string[],
info: string,
state: StateCore,
optionsIn: Record<string, any> = {},
): {
body: string[];
bodyOffset: number;
Expand Down Expand Up @@ -136,7 +143,11 @@ function parseDirectiveContent(
try {
const options = yaml.load(yamlBlock.join('\n')) as Record<string, any>;
if (options && typeof options === 'object') {
return { body: newContent, options: Object.entries(options), bodyOffset };
return {
body: newContent,
options: Object.entries({ ...optionsIn, ...options }),
bodyOffset,
};
}
} catch (err) {
stateWarn(
Expand All @@ -145,7 +156,7 @@ function parseDirectiveContent(
);
}
} else if (content.length && COLON_OPTION_REGEX.exec(content[0])) {
const options: [string, string][] = [];
const options: Record<string, any> = {};
let foundDivider = false;
for (const line of content) {
if (!foundDivider && !COLON_OPTION_REGEX.exec(line)) {
Expand All @@ -158,13 +169,15 @@ function parseDirectiveContent(
} else {
const match = COLON_OPTION_REGEX.exec(line);
const { option, value } = match?.groups ?? {};
if (option) options.push([option, value || 'true']);
if (option) {
options[option] = value || 'true';
}
bodyOffset++;
}
}
return { body: newContent, options, bodyOffset };
return { body: newContent, options: Object.entries({ ...optionsIn, ...options }), bodyOffset };
}
return { body: content, bodyOffset: 1 };
return { body: content, bodyOffset: 1, options: Object.entries({ ...optionsIn }) };
}

function directiveArgToTokens(arg: string, lineNumber: number, state: StateCore) {
Expand Down
5 changes: 4 additions & 1 deletion packages/markdown-it-myst/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ import type MarkdownIt from 'markdown-it/lib';
import { rolePlugin } from './roles.js';
import { directivePlugin } from './directives.js';
import { citationsPlugin } from './citations.js';
import { labelsPlugin } from './labels.js';
import { shortcodePlugin } from './shortcode.js';
import { spanPlugin } from './span.js';

export { rolePlugin, directivePlugin, citationsPlugin };
export { rolePlugin, directivePlugin, citationsPlugin, shortcodePlugin, spanPlugin, labelsPlugin };

/**
* A markdown-it plugin for parsing MyST roles and directives to structured data
Expand Down
29 changes: 29 additions & 0 deletions packages/markdown-it-myst/src/labels.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import MarkdownIt from 'markdown-it/lib';

Check failure on line 1 in packages/markdown-it-myst/src/labels.ts

View workflow job for this annotation

GitHub Actions / lint

All imports in the declaration are only used as types. Use `import type`
import StateInline from 'markdown-it/lib/rules_inline/state_inline.js';

Check failure on line 2 in packages/markdown-it-myst/src/labels.ts

View workflow job for this annotation

GitHub Actions / lint

All imports in the declaration are only used as types. Use `import type`

const LABEL_PATTERN = /^\{\#(.+?)\}/im;

Check failure on line 4 in packages/markdown-it-myst/src/labels.ts

View workflow job for this annotation

GitHub Actions / lint

Unnecessary escape character: \#

const LABEL_TOKEN_NAME = 'myst_target';

function labelRule(state: StateInline, silent: boolean): boolean {
// Check if the label is escaped
if (state.src.charCodeAt(state.pos - 1) === 0x5c) {
/* \ */
// TODO: this could be improved in the case of edge case '\\{', also multi-line
return false;
}
const match = LABEL_PATTERN.exec(state.src.slice(state.pos));
if (match == null) return false;
const [str, content] = match;
if (!silent) {
const token = state.push(LABEL_TOKEN_NAME, '', 0);
token.content = content;
(token as any).col = [state.pos, state.pos + str.length];
}
state.pos += str.length;
return true;
}

export function labelsPlugin(md: MarkdownIt): void {
md.inline.ruler.before('backticks', `parse_${LABEL_TOKEN_NAME}`, labelRule);
}
3 changes: 2 additions & 1 deletion packages/markdown-it-myst/src/roles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@ function runRoles(state: StateCore): boolean {
if (child.type === 'role') {
try {
const { map } = token;
const { content, col } = child as any;
const { content, col, meta } = child as any;
const roleOpen = new state.Token('parsed_role_open', '', 1);
roleOpen.content = content;
roleOpen.hidden = true;
roleOpen.info = child.meta.name;
roleOpen.block = false;
roleOpen.map = map;
roleOpen.meta = meta;
(roleOpen as any).col = col;
const contentTokens = roleContentToTokens(content, map ? map[0] : 0, state);
const roleClose = new state.Token('parsed_role_close', '', -1);
Expand Down
31 changes: 31 additions & 0 deletions packages/markdown-it-myst/src/shortcode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import type MarkdownIt from 'markdown-it/lib';
import type StateCore from 'markdown-it/lib/rules_core/state_core.js';

Check warning on line 2 in packages/markdown-it-myst/src/shortcode.ts

View workflow job for this annotation

GitHub Actions / lint

'StateCore' is defined but never used
import type StateInline from 'markdown-it/lib/rules_inline/state_inline.js';
import { nestedPartToTokens } from './nestedParse.js';

Check warning on line 4 in packages/markdown-it-myst/src/shortcode.ts

View workflow job for this annotation

GitHub Actions / lint

'nestedPartToTokens' is defined but never used

export function shortcodePlugin(md: MarkdownIt): void {
md.inline.ruler.before('backticks', 'parse_short_codes', shortCodeRule);
}

// Hugo short code syntax e.g. {{< role value >}}
const ROLE_PATTERN = /^\{\{\<\s*([a-z0-9_\-+:]{1,36})\s*([^>]*)\s*\>\}\}/;

Check failure on line 11 in packages/markdown-it-myst/src/shortcode.ts

View workflow job for this annotation

GitHub Actions / lint

Unnecessary escape character: \<

Check failure on line 11 in packages/markdown-it-myst/src/shortcode.ts

View workflow job for this annotation

GitHub Actions / lint

Unnecessary escape character: \>

function shortCodeRule(state: StateInline, silent: boolean): boolean {
// Check if the role is escaped
if (state.src.charCodeAt(state.pos - 1) === 0x5c) {
/* \ */
// TODO: this could be improved in the case of edge case '\\{', also multi-line
return false;
}
const match = ROLE_PATTERN.exec(state.src.slice(state.pos));
if (match == null) return false;
const [str, name, content] = match;
if (!silent) {
const token = state.push('role', '', 0);
token.meta = { name };
token.content = content?.trim();
(token as any).col = [state.pos, state.pos + str.length];
}
state.pos += str.length;
return true;
}
35 changes: 35 additions & 0 deletions packages/markdown-it-myst/src/span.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import type MarkdownIt from 'markdown-it/lib';
import type StateCore from 'markdown-it/lib/rules_core/state_core.js';

Check warning on line 2 in packages/markdown-it-myst/src/span.ts

View workflow job for this annotation

GitHub Actions / lint

'StateCore' is defined but never used
import type StateInline from 'markdown-it/lib/rules_inline/state_inline.js';
import { nestedPartToTokens } from './nestedParse.js';

Check warning on line 4 in packages/markdown-it-myst/src/span.ts

View workflow job for this annotation

GitHub Actions / lint

'nestedPartToTokens' is defined but never used

export function spanPlugin(md: MarkdownIt): void {
md.inline.ruler.before('backticks', 'parse_span', spanRule);
}

// Inline span syntax e.g. [markdown]{.class}
const ROLE_PATTERN = /^\[([^\]]*)\]\{([^\}]*)\}/;

Check failure on line 11 in packages/markdown-it-myst/src/span.ts

View workflow job for this annotation

GitHub Actions / lint

Unnecessary escape character: \}

function spanRule(state: StateInline, silent: boolean): boolean {
// Check if the role is escaped
if (state.src.charCodeAt(state.pos - 1) === 0x5c) {
/* \ */
// TODO: this could be improved in the case of edge case '\\[', also multi-line
return false;
}
const match = ROLE_PATTERN.exec(state.src.slice(state.pos));
if (match == null) return false;
const [str, content, options] = match;
if (!silent) {
const token = state.push('role', '', 0);
const classes = options
.split(' ')
.map((c) => c.trim().replace(/^\./, ''))
.filter((c) => !!c);
token.meta = { name: 'span', options: { class: classes.join(' ') } };
token.content = content?.trim();
(token as any).col = [state.pos, state.pos + str.length];
}
state.pos += str.length;
return true;
}
6 changes: 3 additions & 3 deletions packages/markdown-it-myst/tests/cases.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, test, vi } from 'vitest';
import type Token from 'markdown-it/lib/token';
import { citationsPlugin } from '../src';
import { citationsPlugin, labelsPlugin } from '../src';
import fs from 'node:fs';
import path from 'node:path';
import yaml from 'js-yaml';
Expand All @@ -17,7 +17,7 @@ type TestCase = {
};

const directory = path.join('tests');
const files = ['citations.yml'];
const files = ['citations.yml', 'labels.yml'];

const only = ''; // Can set this to a test title

Expand All @@ -36,7 +36,7 @@ casesList.forEach(({ title, cases }) => {
test.each(casesToUse.map((c): [string, TestCase] => [c.title, c]))(
'%s',
(_, { md, tokens }) => {
const mdit = MarkdownIt().use(citationsPlugin);
const mdit = MarkdownIt().use(citationsPlugin).use(labelsPlugin);
const parsed = mdit.parse(md, {});
expect(parsed).containSubset(tokens);
},
Expand Down
22 changes: 22 additions & 0 deletions packages/markdown-it-myst/tests/labels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
title: Labels
cases:
- title: basic {#label}
md: '{#my-label}'
tokens:
- type: paragraph_open
- type: inline
children:
- type: myst_target
content: 'my-label'
- type: paragraph_close
- title: label stops at first }
md: '{#my-label}too}'
tokens:
- type: paragraph_open
- type: inline
children:
- type: myst_target
content: 'my-label'
- type: text
content: 'too}'
- type: paragraph_close
55 changes: 55 additions & 0 deletions packages/markdown-it-myst/tests/shortcode.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { describe, expect, it } from 'vitest';
import MarkdownIt from 'markdown-it';
import { default as plugin, shortcodePlugin } from '../src';

describe('parses roles', () => {
it('basic role parses', () => {
const mdit = MarkdownIt().use(shortcodePlugin).use(plugin);
const tokens = mdit.parse('ok {{< var lang >}}', {});
expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']);
expect(tokens[1].children?.map((t) => t.type)).toEqual([
'text',
'parsed_role_open',
'role_body_open',
'inline',
'role_body_close',
'parsed_role_close',
]);
expect(tokens[1].content).toEqual('ok {{< var lang >}}');
// Pass the column information for the role
expect((tokens[1].children?.[1] as any).col).toEqual([3, 19]);
expect(tokens[1].children?.[1].info).toEqual('var');
expect(tokens[1].children?.[1].content).toEqual('lang');
expect(tokens[1].children?.[3].content).toEqual('lang');
});
it('basic role parses', () => {
const mdit = MarkdownIt().use(shortcodePlugin).use(plugin);
const content = `Notice that the value for \`some_numbers\` is {{< var np_or_r >}},
and that this value *contains* 10 numbers.`;
const tokens = mdit.parse(content, {});
expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']);
expect(tokens[1].children?.map((t) => t.type)).toEqual([
'text',
'code_inline',
'text',
'parsed_role_open',
'role_body_open',
'inline',
'role_body_close',
'parsed_role_close',
'text',
'softbreak',
'text',
'em_open',
'text',
'em_close',
'text',
]);
expect(tokens[1].content).toEqual(content);
// Pass the column information for the role
expect((tokens[1].children?.[3] as any).col).toEqual([44, 63]);
expect(tokens[1].children?.[3].info).toEqual('var');
expect(tokens[1].children?.[3].content).toEqual('np_or_r');
expect(tokens[1].children?.[3].content).toEqual('np_or_r');
});
});
26 changes: 26 additions & 0 deletions packages/markdown-it-myst/tests/span.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { describe, expect, it } from 'vitest';
import MarkdownIt from 'markdown-it';
import { default as plugin, spanPlugin } from '../src';

describe('parses spans', () => {
it('basic span parses', () => {
const mdit = MarkdownIt().use(spanPlugin).use(plugin);
const tokens = mdit.parse('ok [content]{.python}', {});
expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']);
expect(tokens[1].children?.map((t) => t.type)).toEqual([
'text',
'parsed_role_open',
'role_body_open',
'inline',
'role_body_close',
'parsed_role_close',
]);
expect(tokens[1].content).toEqual('ok [content]{.python}');
// Pass the column information for the role
expect((tokens[1].children?.[1] as any).col).toEqual([3, 21]);
expect(tokens[1].children?.[1].info).toEqual('span');
expect(tokens[1].children?.[1].meta.options.class).toEqual('python');
expect(tokens[1].children?.[1].content).toEqual('content');
expect(tokens[1].children?.[3].content).toEqual('content');
});
});
9 changes: 6 additions & 3 deletions packages/myst-cli/src/process/mdast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,18 @@ export async function transformMdast(
})
.use(inlineMathSimplificationPlugin)
.use(mathPlugin, { macros: frontmatter.math })
.use(glossaryPlugin) // This should be before the enumerate plugins
.use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations })
.use(enumerateTargetsPlugin, { state }) // This should be after math/container transforms
.use(joinGatesPlugin);
// Load custom transform plugins
session.plugins?.transforms.forEach((t) => {
if (t.stage !== 'document') return;
pipe.use(t.plugin, undefined, pluginUtils);
});

pipe
.use(glossaryPlugin) // This should be before the enumerate plugins
.use(abbreviationPlugin, { abbreviations: frontmatter.abbreviations })
.use(enumerateTargetsPlugin, { state }); // This should be after math/container transforms

await pipe.run(mdast, vfile);

// This needs to come after basic transformations since meta tags are added there
Expand Down
Loading
Loading