From 741b87767acd30a3f399d3e5e4cce8d81703ec85 Mon Sep 17 00:00:00 2001 From: qwinsi <70425035+qwinsi@users.noreply.github.com> Date: Sun, 25 Aug 2024 19:17:59 +0800 Subject: [PATCH] ignore whitespace before or after _ or ^ --- README.md | 6 ++--- package.json | 2 +- src/parser.ts | 59 ++++++++++++++++++++++++++++------------------- test/main.test.ts | 5 +++- test/math.yml | 5 +++- 5 files changed, 47 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 1d7e5cf..0ae0a6b 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,12 @@ npm install tex2typst ## Or just loading it in a web page ```html - + - + ``` -Replace `0.1.20` with the latest version number in case this README is outdated. +Replace `0.2.7` with the latest version number in case this README is outdated. The size of minimized library `tex2typst.min.js` is about 23 KB. diff --git a/package.json b/package.json index 9dc074a..f774d83 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "tex2typst", - "version": "0.2.6", + "version": "0.2.7", "description": "JavaScript library for converting TeX code to Typst", "type": "module", "main": "dist/index.js", diff --git a/src/parser.ts b/src/parser.ts index 71e4edf..4953556 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -226,12 +226,12 @@ function find_closing_curly_bracket_char(latex: string, start: number): number { } -interface Token { +export interface Token { type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown'; value: string; } -function tokenize(latex: string): Token[] { +export function tokenize(latex: string): Token[] { const tokens: Token[] = []; let pos = 0; @@ -492,22 +492,10 @@ export class LatexParser { case '\\,': return [{ type: 'control', content: '\\,' }, start + 1]; case '_': { - let [sub, pos] = this.parseNextExpr(tokens, start + 1); - let sup: TexNode | undefined = undefined; - if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) { - [sup, pos] = this.parseNextExpr(tokens, pos + 1); - } - const subData = { base: EMPTY_NODE, sub, sup }; - return [{ type: 'supsub', content: '', data: subData }, pos]; + return [ EMPTY_NODE, start]; } case '^': { - let [sup, pos] = this.parseNextExpr(tokens, start + 1); - let sub: TexNode | undefined = undefined; - if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) { - [sub, pos] = this.parseNextExpr(tokens, pos + 1); - } - const supData = { base: EMPTY_NODE, sub, sup }; - return [{ type: 'supsub', content: '', data: supData }, pos]; + return [ EMPTY_NODE, start]; } case '&': return [{ type: 'control', content: '&' }, start + 1]; @@ -677,17 +665,40 @@ export class LatexParser { } } -export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode { - const parser = new LatexParser(); - const original_tokens = tokenize(tex); - let processed_tokens: Token[] = []; - for (const token of original_tokens) { +// Remove all whitespace before or after _ or ^ +function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] { + const is_script_mark = (token: Token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL); + let out_tokens: Token[] = []; + for (let i = 0; i < tokens.length; i++) { + if (tokens[i].type === 'whitespace' && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) { + continue; + } + if (tokens[i].type === 'whitespace' && i - 1 >= 0 && is_script_mark(tokens[i - 1])) { + continue; + } + out_tokens.push(tokens[i]); + } + return out_tokens; +} + +// expand custom tex macros +function passExpandCustomTexMacros(tokens: Token[], customTexMacros: {[key: string]: string}): Token[] { + let out_tokens: Token[] = []; + for (const token of tokens) { if (token.type === 'command' && customTexMacros[token.value]) { const expanded_tokens = tokenize(customTexMacros[token.value]); - processed_tokens = processed_tokens.concat(expanded_tokens); + out_tokens = out_tokens.concat(expanded_tokens); } else { - processed_tokens.push(token); + out_tokens.push(token); } } - return parser.parse(processed_tokens); + return out_tokens; +} + +export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode { + const parser = new LatexParser(); + let tokens = tokenize(tex); + tokens = passIgnoreWhitespaceBeforeScriptMark(tokens); + tokens = passExpandCustomTexMacros(tokens, customTexMacros); + return parser.parse(tokens); } diff --git a/test/main.test.ts b/test/main.test.ts index acd9b7c..b84c6d6 100644 --- a/test/main.test.ts +++ b/test/main.test.ts @@ -2,7 +2,7 @@ import { describe, it, test, expect } from 'vitest'; import yaml from 'js-yaml'; import path from 'node:path'; import fs from 'node:fs'; -import { parseTex, LatexParserError } from '../src/parser'; +import { parseTex, LatexParserError, Token, tokenize } from '../src/parser'; import { tex2typst } from '../src/index'; import { TypstWriterError } from '../src/writer'; import { Tex2TypstOptions, TexNode } from '../src/types'; @@ -34,6 +34,7 @@ caseFiles.forEach(({ title, cases }) => { cases.forEach((c: TestCase) => { test(c.title, function() { const {tex, typst} = c; + let tokens: null | Token[] = null; let tex_node: null | TexNode = null; let result: null | string = null; try { @@ -42,11 +43,13 @@ caseFiles.forEach(({ title, cases }) => { preferTypstIntrinsic: c.preferTypstIntrinsic? c.preferTypstIntrinsic: false, customTexMacros: c.customTexMacros? c.customTexMacros: {}, }; + tokens = tokenize(tex); tex_node = parseTex(tex, settings.customTexMacros!); result = tex2typst(tex, settings); if (result !== typst) { console.log(`====== 😭 Wrong ======`); console.log(tex); + console.log(tokens); console.log(yaml.dump(tex_node)); } expect(result).toBe(typst); diff --git a/test/math.yml b/test/math.yml index c070fa1..3fe89db 100644 --- a/test/math.yml +++ b/test/math.yml @@ -299,4 +299,7 @@ cases: typst: a^(p - 1) equiv 1 mod p - title: thin space tex: a \, b - typst: a thin b \ No newline at end of file + typst: a thin b + - title: space before or after script + tex: \lim _{x \to 0} \sum_{i=1} ^ n + typst: lim_(x arrow.r 0) sum_(i = 1)^n \ No newline at end of file