From 741b87767acd30a3f399d3e5e4cce8d81703ec85 Mon Sep 17 00:00:00 2001
From: qwinsi <70425035+qwinsi@users.noreply.github.com>
Date: Sun, 25 Aug 2024 19:17:59 +0800
Subject: [PATCH] ignore whitespace before or after _ or ^
---
README.md | 6 ++---
package.json | 2 +-
src/parser.ts | 59 ++++++++++++++++++++++++++++-------------------
test/main.test.ts | 5 +++-
test/math.yml | 5 +++-
5 files changed, 47 insertions(+), 30 deletions(-)
diff --git a/README.md b/README.md
index 1d7e5cf..0ae0a6b 100644
--- a/README.md
+++ b/README.md
@@ -16,12 +16,12 @@ npm install tex2typst
## Or just loading it in a web page
```html
-
+
-
+
```
-Replace `0.1.20` with the latest version number in case this README is outdated.
+Replace `0.2.7` with the latest version number in case this README is outdated.
The size of minimized library `tex2typst.min.js` is about 23 KB.
diff --git a/package.json b/package.json
index 9dc074a..f774d83 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "tex2typst",
- "version": "0.2.6",
+ "version": "0.2.7",
"description": "JavaScript library for converting TeX code to Typst",
"type": "module",
"main": "dist/index.js",
diff --git a/src/parser.ts b/src/parser.ts
index 71e4edf..4953556 100644
--- a/src/parser.ts
+++ b/src/parser.ts
@@ -226,12 +226,12 @@ function find_closing_curly_bracket_char(latex: string, start: number): number {
}
-interface Token {
+export interface Token {
type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown';
value: string;
}
-function tokenize(latex: string): Token[] {
+export function tokenize(latex: string): Token[] {
const tokens: Token[] = [];
let pos = 0;
@@ -492,22 +492,10 @@ export class LatexParser {
case '\\,':
return [{ type: 'control', content: '\\,' }, start + 1];
case '_': {
- let [sub, pos] = this.parseNextExpr(tokens, start + 1);
- let sup: TexNode | undefined = undefined;
- if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
- [sup, pos] = this.parseNextExpr(tokens, pos + 1);
- }
- const subData = { base: EMPTY_NODE, sub, sup };
- return [{ type: 'supsub', content: '', data: subData }, pos];
+ return [ EMPTY_NODE, start];
}
case '^': {
- let [sup, pos] = this.parseNextExpr(tokens, start + 1);
- let sub: TexNode | undefined = undefined;
- if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
- [sub, pos] = this.parseNextExpr(tokens, pos + 1);
- }
- const supData = { base: EMPTY_NODE, sub, sup };
- return [{ type: 'supsub', content: '', data: supData }, pos];
+ return [ EMPTY_NODE, start];
}
case '&':
return [{ type: 'control', content: '&' }, start + 1];
@@ -677,17 +665,40 @@ export class LatexParser {
}
}
-export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
- const parser = new LatexParser();
- const original_tokens = tokenize(tex);
- let processed_tokens: Token[] = [];
- for (const token of original_tokens) {
+// Remove all whitespace before or after _ or ^
+function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] {
+ const is_script_mark = (token: Token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
+ let out_tokens: Token[] = [];
+ for (let i = 0; i < tokens.length; i++) {
+ if (tokens[i].type === 'whitespace' && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
+ continue;
+ }
+ if (tokens[i].type === 'whitespace' && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
+ continue;
+ }
+ out_tokens.push(tokens[i]);
+ }
+ return out_tokens;
+}
+
+// expand custom tex macros
+function passExpandCustomTexMacros(tokens: Token[], customTexMacros: {[key: string]: string}): Token[] {
+ let out_tokens: Token[] = [];
+ for (const token of tokens) {
if (token.type === 'command' && customTexMacros[token.value]) {
const expanded_tokens = tokenize(customTexMacros[token.value]);
- processed_tokens = processed_tokens.concat(expanded_tokens);
+ out_tokens = out_tokens.concat(expanded_tokens);
} else {
- processed_tokens.push(token);
+ out_tokens.push(token);
}
}
- return parser.parse(processed_tokens);
+ return out_tokens;
+}
+
+export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
+ const parser = new LatexParser();
+ let tokens = tokenize(tex);
+ tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
+ tokens = passExpandCustomTexMacros(tokens, customTexMacros);
+ return parser.parse(tokens);
}
diff --git a/test/main.test.ts b/test/main.test.ts
index acd9b7c..b84c6d6 100644
--- a/test/main.test.ts
+++ b/test/main.test.ts
@@ -2,7 +2,7 @@ import { describe, it, test, expect } from 'vitest';
import yaml from 'js-yaml';
import path from 'node:path';
import fs from 'node:fs';
-import { parseTex, LatexParserError } from '../src/parser';
+import { parseTex, LatexParserError, Token, tokenize } from '../src/parser';
import { tex2typst } from '../src/index';
import { TypstWriterError } from '../src/writer';
import { Tex2TypstOptions, TexNode } from '../src/types';
@@ -34,6 +34,7 @@ caseFiles.forEach(({ title, cases }) => {
cases.forEach((c: TestCase) => {
test(c.title, function() {
const {tex, typst} = c;
+ let tokens: null | Token[] = null;
let tex_node: null | TexNode = null;
let result: null | string = null;
try {
@@ -42,11 +43,13 @@ caseFiles.forEach(({ title, cases }) => {
preferTypstIntrinsic: c.preferTypstIntrinsic? c.preferTypstIntrinsic: false,
customTexMacros: c.customTexMacros? c.customTexMacros: {},
};
+ tokens = tokenize(tex);
tex_node = parseTex(tex, settings.customTexMacros!);
result = tex2typst(tex, settings);
if (result !== typst) {
console.log(`====== 😠Wrong ======`);
console.log(tex);
+ console.log(tokens);
console.log(yaml.dump(tex_node));
}
expect(result).toBe(typst);
diff --git a/test/math.yml b/test/math.yml
index c070fa1..3fe89db 100644
--- a/test/math.yml
+++ b/test/math.yml
@@ -299,4 +299,7 @@ cases:
typst: a^(p - 1) equiv 1 mod p
- title: thin space
tex: a \, b
- typst: a thin b
\ No newline at end of file
+ typst: a thin b
+ - title: space before or after script
+ tex: \lim _{x \to 0} \sum_{i=1} ^ n
+ typst: lim_(x arrow.r 0) sum_(i = 1)^n
\ No newline at end of file