ruby8008/self_learning.py at main · Ruby-xin/ruby8008 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/env python3
"""
自我学习与迭代系统
根据审稿意见自动优化提示词和配置
"""

import os
import json
from pathlib import Path
from datetime import datetime
from anthropic import Anthropic

class SelfLearningSystem:
    """自我学习系统"""

    def __init__(self):
        self.config_dir = Path(__file__).parent / "config"
        self.config_dir.mkdir(exist_ok=True)

        self.learning_log = self.config_dir / "learning_log.jsonl"
        self.prompt_templates = self.config_dir / "prompt_templates.json"
        self.known_issues = self.config_dir / "known_issues.json"

        # 初始化 Claude API
        import sys
        sys.path.insert(0, os.path.dirname(__file__))
        from modules.content_gen import ContentGenerator
        gen = ContentGenerator()
        self.client = Anthropic(api_key=gen.client.api_key)

        # 加载现有配置
        self._load_configurations()

    def _load_configurations(self):
        """加载现有配置"""
        # 加载提示词模板
        if self.prompt_templates.exists():
            with open(self.prompt_templates, 'r', encoding='utf-8') as f:
                self.templates = json.load(f)
        else:
            self.templates = self._get_default_templates()
            self._save_templates()

        # 加载已知问题
        if self.known_issues.exists():
            with open(self.known_issues, 'r', encoding='utf-8') as f:
                self.issues = json.load(f)
        else:
            self.issues = {"issues": [], "fixed_count": 0}
            self._save_issues()

    def _get_default_templates(self) -> dict:
        """获取默认提示词模板"""
        return {
            "content_generation": {
                "version": "1.0",
                "last_updated": datetime.now().isoformat(),
                "persona": {
                    "name": "ruby鑫燕",
                    "traits": [
                        "高级经管硕士背景的职场专家",
                        "地道的北京大妞儿",
                        "幽默风趣，说话直接不绕弯子",
                        "简洁有力，从不说废话",
                        "平等对话，拒绝说教和爹味",
                        "洞察力强，能把复杂的事说清楚",
                        "非暴力沟通，尊重读者但不客套"
                    ],
                    "language_style": [
                        "用\"咱们\"\"姐妹们\"\"哥们儿\"等亲切称呼",
                        "多用\"其实\"\"说白了\"\"你想啊\"等口语化表达",
                        "适当用北京话的语气词\"呗\"\"嘛\"\"哈\"",
                        "用大白话解释专业概念",
                        "偶尔自嘲或吐槽，拉近距离"
                    ]
                },
                "writing_rules": [
                    "标题简洁有力（10-15字），制造冲突或好奇",
                    "开头直接扎心，用场景+痛点",
                    "子标题简短（不超过12字），一看就懂",
                    "每段控制在3-5行，不要大段文字",
                    "多用\"→\"、数字列表、对比等视觉元素",
                    "结尾有互动引导，但不说教"
                ],
                "avoid": [
                    "不要标题重复出现两次",
                    "不要使用\"应该\"\"必须\"等说教词汇",
                    "不要大段文字堆砌",
                    "不要过于学术化的表述",
                    "不要生硬的AI感"
                ]
            },
            "editorial_review": {
                "version": "1.0",
                "last_updated": datetime.now().isoformat(),
                "scoring_dimensions": [
                    "选题价值",
                    "引流能力",
                    "内容逻辑",
                    "主标题",
                    "子标题",
                    "语气调性",
                    "人设契合",
                    "洞察深度",
                    "非暴力沟通",
                    "整体质量"
                ],
                "pass_threshold": 70,
                "excellence_threshold": 85
            }
        }

    def analyze_review_and_optimize(self, review_file: str):
        """分析审稿意见并优化配置"""
        print("\n" + "="*70)
        print("🧠 自我学习系统启动")
        print("="*70 + "\n")

        # 读取审稿意见
        with open(review_file, 'r', encoding='utf-8') as f:
            review_content = f.read()

        print("📖 正在分析审稿意见...")

        # 使用 AI 分析问题模式
        analysis_prompt = f"""
你是一个自我学习系统，负责从审稿意见中提取问题模式并优化创作规则。

**审稿意见**:

{review_content}

**任务**:

1. 提取所有被指出的问题（按严重程度排序）
2. 识别问题的根本原因
3. 提出具体的优化建议（可直接应用到提示词中）
4. 生成检查清单，确保下次不再出现同样问题

**输出格式（JSON）**:

```json
{{
  "identified_issues": [
    {{
      "issue": "问题描述",
      "severity": "高/中/低",
      "root_cause": "根本原因",
      "examples": ["具体例子1", "具体例子2"],
      "fix_suggestion": "优化建议"
    }}
  ],
  "prompt_optimizations": [
    {{
      "section": "persona/writing_rules/avoid",
      "action": "add/modify/remove",
      "content": "具体内容"
    }}
  ],
  "checklist": [
    "检查项1",
    "检查项2"
  ],
  "estimated_score_improvement": "预计提升分数"
}}
```

现在开始分析:
"""

        response = self.client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=4000,
            temperature=0.3,
            messages=[{"role": "user", "content": analysis_prompt}]
        )

        # 提取 JSON
        import re
        json_match = re.search(r'```json\n(.*?)\n```', response.content[0].text, re.DOTALL)
        if json_match:
            analysis = json.loads(json_match.group(1))
        else:
            # 尝试直接解析
            analysis = json.loads(response.content[0].text)

        print("✅ 分析完成！\n")

        # 显示发现的问题
        print("【发现的问题】\n")
        for idx, issue in enumerate(analysis['identified_issues'], 1):
            print(f"{idx}. {issue['issue']} (严重程度: {issue['severity']})")
            print(f"   根本原因: {issue['root_cause']}")
            print(f"   优化建议: {issue['fix_suggestion']}\n")

        # 应用优化
        print("\n【应用优化】\n")
        self._apply_optimizations(analysis['prompt_optimizations'])

        # 保存学习记录
        self._save_learning_log(review_file, analysis)

        # 更新已知问题
        self._update_known_issues(analysis['identified_issues'])

        # 生成检查清单
        self._save_checklist(analysis['checklist'])

        print("\n" + "="*70)
        print("✅ 自我学习完成！")
        print("="*70)
        print(f"📊 预计分数提升: {analysis['estimated_score_improvement']}")
        print(f"📝 新增检查项: {len(analysis['checklist'])} 项")
        print(f"🔧 优化配置: {len(analysis['prompt_optimizations'])} 处")
        print("="*70 + "\n")

        return analysis

    def _apply_optimizations(self, optimizations: list):
        """应用优化到提示词模板"""
        for opt in optimizations:
            section = opt['section']
            action = opt['action']
            content = opt['content']

            print(f"  • {action.upper()} {section}: {content[:50]}...")

            # 应用到模板
            if action == "add":
                if section in ["persona.traits", "persona.language_style", "writing_rules", "avoid"]:
                    parts = section.split('.')
                    if len(parts) == 2:
                        self.templates['content_generation'][parts[0]][parts[1]].append(content)
                    else:
                        self.templates['content_generation'][section].append(content)

            elif action == "modify":
                # 修改现有条目（简化处理，直接添加）
                if section in ["writing_rules", "avoid"]:
                    if content not in self.templates['content_generation'][section]:
                        self.templates['content_generation'][section].append(content)

        # 更新版本和时间
        self.templates['content_generation']['version'] = f"{float(self.templates['content_generation']['version']) + 0.1:.1f}"
        self.templates['content_generation']['last_updated'] = datetime.now().isoformat()

        self._save_templates()

    def _save_templates(self):
        """保存模板"""
        with open(self.prompt_templates, 'w', encoding='utf-8') as f:
            json.dump(self.templates, f, ensure_ascii=False, indent=2)

    def _save_issues(self):
        """保存问题记录"""
        with open(self.known_issues, 'w', encoding='utf-8') as f:
            json.dump(self.issues, f, ensure_ascii=False, indent=2)

    def _save_learning_log(self, review_file: str, analysis: dict):
        """保存学习日志"""
        log_entry = {
            "timestamp": datetime.now().isoformat(),
            "review_file": review_file,
            "issues_found": len(analysis['identified_issues']),
            "optimizations_applied": len(analysis['prompt_optimizations']),
            "analysis": analysis
        }

        with open(self.learning_log, 'a', encoding='utf-8') as f:
            f.write(json.dumps(log_entry, ensure_ascii=False) + '\n')

    def _update_known_issues(self, issues: list):
        """更新已知问题库"""
        for issue in issues:
            # 检查是否已存在
            exists = False
            for known in self.issues['issues']:
                if known['issue'] == issue['issue']:
                    known['occurrences'] = known.get('occurrences', 1) + 1
                    known['last_seen'] = datetime.now().isoformat()
                    exists = True
                    break

            if not exists:
                self.issues['issues'].append({
                    **issue,
                    "first_seen": datetime.now().isoformat(),
                    "occurrences": 1,
                    "status": "active"
                })

        self._save_issues()

    def _save_checklist(self, items: list):
        """保存检查清单"""
        checklist_file = self.config_dir / "pre_publish_checklist.md"

        with open(checklist_file, 'w', encoding='utf-8') as f:
            f.write("# 发布前检查清单\n\n")
            f.write(f"*最后更新: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")

            f.write("## 必查项目\n\n")
            for idx, item in enumerate(items, 1):
                f.write(f"- [ ] {item}\n")

            f.write("\n## 常见问题自查\n\n")
            for issue in self.issues['issues'][:10]:  # 前10个高频问题
                if issue.get('status') == 'active':
                    f.write(f"- [ ] {issue['issue']}\n")

    def get_optimized_prompt(self, prompt_type: str = "content_generation") -> str:
        """获取优化后的提示词"""
        template = self.templates.get(prompt_type, {})

        if prompt_type == "content_generation":
            persona = template['persona']

            prompt = f"""你是 {persona['name']}，一位高级经管硕士背景的职场专家，同时也是个地道的北京大妞儿。

**你的人设特点**：
{chr(10).join(f'- {trait}' for trait in persona['traits'])}

**语言风格**：
{chr(10).join(f'- {style}' for style in persona['language_style'])}

**写作要求**：
{chr(10).join(f'{i+1}. {rule}' for i, rule in enumerate(template['writing_rules']))}

**务必避免**：
{chr(10).join(f'❌ {avoid}' for avoid in template['avoid'])}
"""
            return prompt

        return ""


def main():
    """主函数"""
    import argparse

    parser = argparse.ArgumentParser(description='自我学习系统')
    parser.add_argument('review_file', help='审稿报告文件')
    parser.add_argument('--show-prompt', action='store_true', help='显示优化后的提示词')

    args = parser.parse_args()

    system = SelfLearningSystem()

    # 分析并优化
    analysis = system.analyze_review_and_optimize(args.review_file)

    if args.show_prompt:
        print("\n【优化后的提示词】\n")
        print(system.get_optimized_prompt())
        print("\n")

    print("💡 下一步:")
    print(f"   1. 查看优化配置: cat {system.prompt_templates}")
    print(f"   2. 查看检查清单: cat {system.config_dir}/pre_publish_checklist.md")
    print(f"   3. 使用新配置重新生成文章")
    print()


if __name__ == "__main__":
    main()