ruby8008/export_xiumi.py at main · Jackychan1989/ruby8008 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
"""
导出为秀米/135编辑器兼容格式
"""

import os
import sys
import argparse
from pathlib import Path
from bs4 import BeautifulSoup


def html_to_xiumi_format(html_file: str, output_file: str = None):
    """
    将 HTML 转换为秀米/135编辑器兼容格式

    秀米/135编辑器支持更完整的 HTML，但需要特殊处理
    """
    print(f"📄 读取 HTML: {html_file}")

    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    soup = BeautifulSoup(html_content, 'html.parser')

    # 提取正文内容
    content_div = soup.find('div', class_='content')
    if not content_div:
        print("❌ 未找到内容区域")
        sys.exit(1)

    # 移除 h1（标题单独处理）
    h1 = content_div.find('h1')
    if h1:
        h1.decompose()

    # 处理段落样式 - 移除首行缩进（微信公众号不需要）
    for p in content_div.find_all('p'):
        if p.get('style'):
            style = p['style']
            # 移除 text-indent
            style = style.replace('text-indent: 2em;', '')
            p['style'] = style

    # 处理图片 - 转换为绝对路径
    html_dir = os.path.dirname(os.path.abspath(html_file))
    for img in content_div.find_all('img'):
        src = img.get('src')
        if src and not src.startswith('http'):
            # 转换为绝对路径
            abs_path = os.path.join(html_dir, '..', src)
            img['data-local-path'] = abs_path
            img['alt'] = '配图'

    # 构建完整的 HTML
    xiumi_html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>秀米格式导出</title>
    <style>
        body {{
            font-family: -apple-system, BlinkMacSystemFont, "PingFang SC", "Microsoft YaHei", sans-serif;
            line-height: 1.8;
            color: #333;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }}
        h2 {{
            font-size: 20px;
            font-weight: bold;
            margin: 30px 0 15px 0;
            border-left: 4px solid #4CAF50;
            padding-left: 10px;
        }}
        h3 {{
            font-size: 18px;
            font-weight: bold;
            margin: 25px 0 12px 0;
            color: #2c3e50;
        }}
        p {{
            margin: 15px 0;
            line-height: 1.8;
        }}
        img {{
            max-width: 100%;
            display: block;
            margin: 20px auto;
        }}
        ul, ol {{
            margin: 15px 0;
            padding-left: 30px;
        }}
        li {{
            margin: 8px 0;
        }}
        blockquote {{
            border-left: 4px solid #ddd;
            padding-left: 15px;
            margin: 20px 0;
            color: #666;
            font-style: italic;
        }}
    </style>
</head>
<body>
{str(content_div)}

<p style="text-align: center; color: #999; font-size: 14px; margin-top: 50px;">
    ———— END ————
</p>

<p style="text-align: center; color: #999; font-size: 12px;">
    本文由 AI 辅助创作
</p>
</body>
</html>
"""

    # 确定输出文件
    if not output_file:
        base_name = os.path.splitext(html_file)[0]
        output_file = f"{base_name}_xiumi.html"

    # 保存文件
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(xiumi_html)

    print(f"✅ 秀米格式已导出: {output_file}")
    print(f"\n📝 使用方法:")
    print(f"1. 在浏览器打开: {output_file}")
    print(f"2. 全选复制 (Cmd+A, Cmd+C)")
    print(f"3. 访问 https://xiumi.us/ 或 https://www.135editor.com/")
    print(f"4. 粘贴内容")
    print(f"5. 点击「一键排版」")
    print(f"6. 从编辑器同步到微信公众号")

    # 生成图片上传清单
    images = []
    for img in content_div.find_all('img'):
        local_path = img.get('data-local-path')
        if local_path and os.path.exists(local_path):
            images.append(local_path)

    if images:
        print(f"\n🖼️  需要上传的图片 ({len(images)} 张):")
        for idx, img_path in enumerate(images, 1):
            print(f"   {idx}. {img_path}")

        # 保存图片清单
        img_list_file = output_file.replace('.html', '_images.txt')
        with open(img_list_file, 'w', encoding='utf-8') as f:
            for img_path in images:
                f.write(f"{img_path}\n")
        print(f"\n💾 图片清单已保存: {img_list_file}")

    return output_file


def main():
    parser = argparse.ArgumentParser(description='导出为秀米/135编辑器格式')
    parser.add_argument('html_file', nargs='?', help='HTML 文件路径')
    parser.add_argument('-o', '--output', help='输出文件路径')

    args = parser.parse_args()

    # 确定要转换的文件
    if args.html_file:
        html_file = args.html_file
    else:
        # 查找最新的 HTML 文件
        output_dir = "./output"
        html_files = list(Path(output_dir).glob("*.html"))

        if not html_files:
            print("❌ 未找到 HTML 文件")
            sys.exit(1)

        html_file = str(sorted(html_files, key=lambda x: x.stat().st_mtime, reverse=True)[0])
        print(f"📄 使用最新文章: {html_file}\n")

    if not os.path.exists(html_file):
        print(f"❌ 文件不存在: {html_file}")
        sys.exit(1)

    print("\n" + "=" * 70)
    print("🎨 秀米/135编辑器格式导出工具")
    print("=" * 70 + "\n")

    html_to_xiumi_format(html_file, args.output)

    print("\n" + "=" * 70)
    print("🎉 导出完成！")
    print("=" * 70 + "\n")


if __name__ == "__main__":
    main()