From 21d0e72ed614f56bc023bdbc7341ea2c0cb98520 Mon Sep 17 00:00:00 2001 From: shfshanyue Date: Thu, 19 Mar 2026 23:25:09 +0800 Subject: [PATCH] Add addDefuddleRules function and update exports - Introduced addDefuddleRules to register custom rules for TurndownService. - Updated createMarkdownContent to utilize addDefuddleRules. - Exported addDefuddleRules from index.full.ts for external use. - Added tests for addDefuddleRules to ensure proper functionality. --- src/index.full.ts | 5 +++-- src/markdown.ts | 33 ++++++++++++++------------------- tests/markdown.test.ts | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/index.full.ts b/src/index.full.ts index 3d18e8b08..8d176b22b 100644 --- a/src/index.full.ts +++ b/src/index.full.ts @@ -1,12 +1,12 @@ import { Defuddle as DefuddleBase } from './defuddle'; import { DefuddleOptions, DefuddleResponse } from './types'; -import { toMarkdown, createMarkdownContent } from './markdown'; +import { toMarkdown, createMarkdownContent, addDefuddleRules } from './markdown'; // Export types export type { DefuddleOptions, DefuddleResponse }; // Export standalone markdown conversion -export { createMarkdownContent }; +export { createMarkdownContent, addDefuddleRules }; class Defuddle { private defuddle: DefuddleBase; @@ -32,6 +32,7 @@ class Defuddle { // Attach named exports as static properties for UMD/CJS consumers (Defuddle as any).createMarkdownContent = createMarkdownContent; +(Defuddle as any).addDefuddleRules = addDefuddleRules; // Export Defuddle as default export default Defuddle; diff --git a/src/markdown.ts b/src/markdown.ts index 2f28a0bf7..30f015016 100644 --- a/src/markdown.ts +++ b/src/markdown.ts @@ -76,17 +76,7 @@ function getBestImageSrc(node: GenericElement): string { return node.getAttribute('src') || ''; } -export function createMarkdownContent(content: string, url: string) { - const footnotes: { [key: string]: string } = {}; - const turndownService = new TurndownService({ - headingStyle: 'atx', - hr: '---', - bulletListMarker: '-', - codeBlockStyle: 'fenced', - emDelimiter: '*', - preformattedCode: true, - }); - +export function addDefuddleRules(turndownService: TurndownService): void { turndownService.addRule('table', { filter: 'table', replacement: function(content, node) { @@ -726,6 +716,19 @@ export function createMarkdownContent(content: string, url: string) { } return ''; } +} + +export function createMarkdownContent(content: string, url: string) { + const turndownService = new TurndownService({ + headingStyle: 'atx', + hr: '---', + bulletListMarker: '-', + codeBlockStyle: 'fenced', + emDelimiter: '*', + preformattedCode: true, + }); + + addDefuddleRules(turndownService); try { // Strip tags — word break opportunity hints that are invisible in @@ -752,14 +755,6 @@ export function createMarkdownContent(content: string, url: string) { // Remove any consecutive newlines more than two markdown = markdown.replace(/\n{3,}/g, '\n\n'); - - // Append footnotes at the end of the document - if (Object.keys(footnotes).length > 0) { - markdown += '\n\n---\n\n'; - for (const [id, content] of Object.entries(footnotes)) { - markdown += `[^${id}]: ${content}\n\n`; - } - } return markdown.trim(); } catch (error) { diff --git a/tests/markdown.test.ts b/tests/markdown.test.ts index ff2879e50..e60fce8e6 100644 --- a/tests/markdown.test.ts +++ b/tests/markdown.test.ts @@ -1,6 +1,8 @@ import { describe, test, expect } from 'vitest'; +import TurndownService from 'turndown'; import { Defuddle } from '../src/node'; import { parseDocument } from './helpers'; +import { addDefuddleRules } from '../src/markdown'; describe('Markdown conversion', () => { describe('exclamation mark before image', () => { @@ -68,4 +70,38 @@ describe('Markdown conversion', () => { expect(result.contentMarkdown).toContain('longword'); }); }); + + describe('addDefuddleRules', () => { + test('should register rules on a custom TurndownService instance', () => { + const td = new TurndownService({ headingStyle: 'setext', bulletListMarker: '*' }); + addDefuddleRules(td); + + const html = '

Title

'; + const md = td.turndown(html); + + // setext heading style (user option respected) + expect(md).toContain('Title\n-----'); + // bullet marker from user option + expect(md).toContain('* one'); + }); + + test('should apply table rule from defuddle', () => { + const td = new TurndownService(); + addDefuddleRules(td); + + const html = '
AB
12
'; + const md = td.turndown(html); + + expect(md).toContain('| A | B |'); + expect(md).toContain('| 1 | 2 |'); + }); + + test('should apply highlight rule from defuddle', () => { + const td = new TurndownService(); + addDefuddleRules(td); + + const md = td.turndown('

This is highlighted text

'); + expect(md).toContain('==highlighted=='); + }); + }); });