├── .env.example
├── .gitignore
├── README.md
├── config.json.example
├── data
    ├── cache
    │   └── .gitkeep
    ├── logs
    │   └── .gitkeep
    ├── output
    │   └── .gitkeep
    └── reference
    │   └── .gitkeep
├── main.py
├── requirements.txt
└── src
    ├── config
        ├── ai_config.py
        └── config.py
    ├── generators
        ├── common
        │   ├── data_structures.py
        │   └── utils.py
        ├── content
        │   ├── __init__.py
        │   ├── consistency_checker.py
        │   ├── content_generator.py
        │   └── validators.py
        ├── finalizer
        │   └── finalizer.py
        ├── outline
        │   └── outline_generator.py
        ├── prompts.py
        └── title_generator.py
    ├── knowledge_base
        └── knowledge_base.py
    ├── models
        ├── __init__.py
        ├── base_model.py
        ├── gemini_model.py
        └── openai_model.py
    └── tools
        ├── generate_config.py
        └── generate_marketing.py


/.env.example:
--------------------------------------------------------------------------------
 1 | # Gemini API配置
 2 | GEMINI_API_KEY=
 3 | 
 4 | # 嵌入模型配置
 5 | OPENAI_EMBEDDING_API_KEY=
 6 | OPENAI_EMBEDDING_API_BASE=https://api.siliconflow.cn/v1
 7 | 
 8 | # 大纲模型配置
 9 | OPENAI_OUTLINE_API_KEY=
10 | OPENAI_OUTLINE_API_BASE=https://api.siliconflow.cn/v1
11 | 
12 | # 内容模型配置
13 | OPENAI_CONTENT_API_KEY=
14 | OPENAI_CONTENT_API_BASE=https://api.siliconflow.cn/v1
15 | 
16 | # 超时和重试配置
17 | GEMINI_TIMEOUT=300
18 | GEMINI_MAX_RETRIES=3
19 | GEMINI_RETRY_DELAY=90
20 | OPENAI_CONTENT_TIMEOUT=180
21 | OPENAI_OUTLINE_TIMEOUT=120
22 | OPENAI_EMBEDDING_TIMEOUT=60
23 | OPENAI_RETRY_DELAY=15
24 | 
25 | # ==============================================
26 | # 火山引擎DeepSeek-V3.1配置
27 | # ==============================================
28 | 
29 | # 火山引擎API密钥（必填）
30 | # 从火山引擎控制台获取：https://console.volcengine.com/ark/
31 | VOLCENGINE_API_KEY=
32 | 
33 | # API端点（默认值已经是正确的，通常不需要修改）
34 | VOLCENGINE_API_ENDPOINT=https://ark.cn-beijing.volces.com/api/v3
35 | 
36 | # 大纲生成模型配置
37 | VOLCENGINE_OUTLINE_MODEL_ID=deepseek-v3-1-250821
38 | VOLCENGINE_OUTLINE_TEMPERATURE=1.0
39 | 
40 | # 内容生成模型配置
41 | VOLCENGINE_CONTENT_MODEL_ID=deepseek-v3-1-250821
42 | VOLCENGINE_CONTENT_TEMPERATURE=0.7
43 | 
44 | # 深度思考模式（推荐开启以提升生成质量）
45 | VOLCENGINE_THINKING_ENABLED=true
46 | 
47 | # 请求超时时间（秒）
48 | VOLCENGINE_TIMEOUT=120
49 | 
50 | # 最大输出token数
51 | VOLCENGINE_MAX_TOKENS=8192
52 | 
53 | # 重试延迟（秒）
54 | VOLCENGINE_RETRY_DELAY=15
55 | 
56 | # 最大重试次数
57 | VOLCENGINE_MAX_RETRIES=3
58 | 
59 | # 是否启用备用模型（推荐开启）
60 | VOLCENGINE_FALLBACK_ENABLED=true


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python虚拟环境
 2 | venv/
 3 | env/
 4 | .venv/
 5 | 
 6 | # Python缓存文件
 7 | __pycache__/
 8 | *.py[cod]
 9 | *$py.class
10 | .pytest_cache/
11 | 
12 | # 分发/打包
13 | dist/
14 | build/
15 | *.egg-info/
16 | 
17 | # IDE配置
18 | .idea/
19 | .vscode/
20 | *.swp
21 | *.swo
22 | .claude/
23 | .cursor/
24 | .kiro/
25 | .codebuddy/
26 | .trae/
27 | .qoder/
28 | 
29 | # 项目特定文件
30 | data/
31 | src/gui/
32 | !data/*/.gitkeep
33 | config*.json
34 | .cursor/rules/*
35 | *.log
36 | web_ui/
37 | backup/
38 | .promptx/
39 | backup/
40 | tests/
41 | docs/
42 | examples/
43 | scripts/
44 | *_gui.*
45 | *
46 | # 环境变量
47 | .env
48 | !.env.example
49 | 
50 | # 系统文件
51 | .DS_Store
52 | Thumbs.db
53 | .cursorignore
54 | .cursorrules
55 | .qoderignore


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OCNovel - AI小说生成系统
  2 | 
  3 | 一个基于Python的AI小说自动生成系统，支持东方玄幻、仙侠、武侠等多种类型的小说创作。系统采用模块化设计，集成了多种AI模型接口，提供从大纲生成到章节内容创作的全流程自动化。
  4 | 
  5 | ## 项目结构
  6 | 
  7 | ```
  8 | OCNovel/
  9 | ├── main.py                    # 主程序入口
 10 | ├── config.json.example        # 配置文件模板
 11 | ├── config.json               # 配置文件（基于模板生成）
 12 | ├── requirements.txt          # Python依赖包列表
 13 | ├── .env.example              # 环境变量模板
 14 | ├── .env                      # 环境变量配置
 15 | ├── README.md                 # 项目说明文档
 16 | ├── .gitignore               # Git忽略配置
 17 | │
 18 | ├── src/                      # 源代码目录
 19 | │   ├── config/               # 配置管理模块
 20 | │   │   ├── ai_config.py      # AI模型配置管理
 21 | │   │   └── config.py         # 通用配置管理
 22 | │   │
 23 | │   ├── generators/           # 内容生成器模块
 24 | │   │   ├── common/           # 通用数据结构和工具
 25 | │   │   │   ├── data_structures.py  # 数据结构定义
 26 | │   │   │   └── utils.py      # 通用工具函数
 27 | │   │   │
 28 | │   │   ├── content/          # 章节内容生成
 29 | │   │   │   ├── content_generator.py    # 内容生成器
 30 | │   │   │   ├── consistency_checker.py # 一致性检查器
 31 | │   │   │   └── validators.py          # 内容验证器
 32 | │   │   │
 33 | │   │   ├── outline/          # 大纲生成
 34 | │   │   │   └── outline_generator.py   # 大纲生成器
 35 | │   │   │
 36 | │   │   ├── finalizer/        # 内容最终化处理
 37 | │   │   │   └── finalizer.py           # 内容最终化器
 38 | │   │   │
 39 | │   │   ├── models.py         # 生成模型定义
 40 | │   │   ├── prompts.py        # 提示词模板
 41 | │   │   ├── title_generator.py        # 标题生成器
 42 | │   │   └── humanization_prompts.py  # 人性化提示词
 43 | │   │
 44 | │   ├── models/               # AI模型接口
 45 | │   │   ├── base_model.py     # 基础模型抽象类
 46 | │   │   ├── gemini_model.py   # Google Gemini模型接口
 47 | │   │   └── openai_model.py   # OpenAI模型接口
 48 | │   │
 49 | │   ├── knowledge_base/       # 知识库模块
 50 | │   │   └── knowledge_base.py # 知识库管理
 51 | │   │
 52 | │   └── tools/                # 工具和辅助功能
 53 | │       ├── generate_config.py       # 配置生成工具
 54 | │       └── generate_marketing.py     # 营销内容生成
 55 | │
 56 | └── data/                     # 数据目录
 57 |     ├── cache/                # 缓存数据存储
 58 |     ├── logs/                 # 日志文件存储
 59 |     ├── output/               # 生成内容输出目录
 60 |     ├── marketing/            # 营销内容存储
 61 |     ├── reference/            # 参考资料存储
 62 |     └── style_sources/        # 风格源文件存储
 63 | ```
 64 | 
 65 | ## 核心模块功能
 66 | 
 67 | ### 1. 配置管理模块 (`src/config/`)
 68 | - **config.py**: 统一的配置管理类，支持环境变量加载和敏感信息过滤
 69 | - **ai_config.py**: AI模型配置管理，支持多模型切换和配置
 70 | 
 71 | ### 2. 内容生成模块 (`src/generators/`)
 72 | - **outline_generator.py**: 小说大纲生成器，支持章节结构规划
 73 | - **content_generator.py**: 章节内容生成器，集成一致性检查和验证
 74 | - **consistency_checker.py**: 内容一致性检查，确保情节连贯
 75 | - **validators.py**: 内容验证器，检查逻辑和重复内容
 76 | - **finalizer.py**: 内容最终化处理，优化输出质量
 77 | 
 78 | ### 3. AI模型接口 (`src/models/`)
 79 | - **base_model.py**: 基础模型抽象类，定义统一接口
 80 | - **gemini_model.py**: Google Gemini模型接口实现
 81 | - **openai_model.py**: OpenAI模型接口实现
 82 | - 支持多模型切换和备用模型机制
 83 | 
 84 | ### 4. 知识库模块 (`src/knowledge_base/`)
 85 | - **knowledge_base.py**: 知识库管理，支持参考文件加载和缓存
 86 | - 支持文本分块和语义搜索功能
 87 | 
 88 | ### 环境配置
 89 | - Python 3.9+
 90 | - 支持的AI模型API密钥（OpenAI、Gemini、VolcEngine等）
 91 | - 配置文件 `config.json`（基于 `config.json.example` 生成）
 92 | - 环境变量文件 `.env`（包含API密钥等敏感信息）
 93 | 
 94 | ## 基础使用示例
 95 | 
 96 | ### 1. 安装依赖
 97 | ```bash
 98 | pip install -r requirements.txt
 99 | ```
100 | 
101 | ### 2. 配置设置
102 | 复制配置文件模板并修改：
103 | ```bash
104 | cp config.json.example config.json
105 | cp .env.example .env
106 | ```
107 | 
108 | 编辑 `config.json` 配置小说参数：
109 | ```json
110 | {
111 |     "novel_config": {
112 |         "type": "东方玄幻",
113 |         "theme": "凡人流、成长、冒险",
114 |         "title": "牧神记",
115 |         "target_chapters": 100,
116 |         "chapter_length": 2500
117 |     }
118 | }
119 | ```
120 | 
121 | 编辑 `.env` 配置API密钥：
122 | ```bash
123 | OPENAI_API_KEY=your_openai_api_key
124 | GEMINI_API_KEY=your_gemini_api_key
125 | VOLCENGINE_ACCESS_KEY=your_volcengine_key
126 | VOLCENGINE_SECRET_KEY=your_volcengine_secret
127 | ```
128 | 
129 | ### 3. 运行小说生成
130 | ```bash
131 | # 生成小说大纲（第1-10章）
132 | python main.py outline --start 1 --end 10
133 | 
134 | # 生成指定章节内容（重新生成第5章）
135 | python main.py content --target-chapter 5
136 | 
137 | # 生成章节内容（从第3章开始继续）
138 | python main.py content --start-chapter 3
139 | 
140 | # 处理章节定稿（处理第8章）
141 | python main.py finalize --chapter 8
142 | 
143 | # 自动执行完整流程（大纲+内容+定稿）
144 | python main.py auto
145 | 
146 | # 强制重新生成所有大纲后执行完整流程
147 | python main.py auto --force-outline
148 | 
149 | # 仿写文本（基于风格范文）
150 | python main.py imitate --style-source data/style_sources/范文.txt --input-file data/input/原始文本.txt --output-file data/output/仿写结果.txt
151 | 
152 | # 使用额外提示词
153 | python main.py outline --start 1 --end 5 --extra-prompt "增加悬疑元素"
154 | python main.py content --extra-prompt "增加人物对话"
155 | python main.py auto --extra-prompt "保持轻松幽默风格"
156 | ```
157 | 
158 | ## 配置说明
159 | 
160 | ### 主要配置项
161 | - **knowledge_base_config**: 知识库配置（分块大小、重叠、缓存目录）
162 | - **log_config**: 日志配置（目录、级别、格式）
163 | - **novel_config**: 小说配置（类型、主题、风格、标题、目标章节数）
164 | - **generation_config**: 生成配置（重试次数、批量大小、模型选择）
165 | - **output_config**: 输出配置（格式、编码、保存选项）
166 | - **imitation_config**: 风格模仿配置（启用状态、风格源文件）
167 | 
168 | ### 支持的小说类型
169 | - 东方玄幻
170 | - 仙侠修真  
171 | - 武侠江湖
172 | - 都市异能
173 | - 历史架空
174 | - 科幻奇幻
175 | 
176 | ## 开发说明
177 | 
178 | ### 架构设计原则
179 | 1. **分层架构**: 表示层、业务逻辑层、服务层、数据层分离
180 | 2. **模块化设计**: 各功能模块独立，便于扩展和维护
181 | 3. **统一接口**: AI模型统一接口设计，支持多模型切换
182 | 4. **错误处理**: 完善的错误处理和重试机制
183 | 5. **配置管理**: 统一的配置管理和环境变量支持


--------------------------------------------------------------------------------
/config.json.example:
--------------------------------------------------------------------------------
  1 | {
  2 |     "knowledge_base_config": {
  3 |       "reference_files": [],
  4 |       "chunk_size": 1200,
  5 |       "chunk_overlap": 300,
  6 |       "cache_dir": "data/cache"
  7 |     },
  8 |     "log_config": {
  9 |       "log_dir": "data/logs",
 10 |       "log_level": "DEBUG",
 11 |       "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 12 |     },
 13 |     "novel_config": {
 14 |       "type": "东方玄幻",
 15 |       "theme": "凡人流、成长、冒险",
 16 |       "style": "热血、幽默、悬疑",
 17 |       "title": "牧神记",
 18 |       "target_chapters": 100,
 19 |       "chapter_length": 2500,
 20 |       "knowledge_base": {
 21 |         "chunk_size": 1200,
 22 |         "chunk_overlap": 300,
 23 |         "cache_dir": "data/cache"
 24 |       },
 25 |       "writing_guide": {
 26 |         "world_building": {
 27 |           "magic_system": "修炼体系基于开启人体'七大神藏'：灵胎、五曜、六合、七星、天人、生死、神桥。每个神藏间有'壁'阻隔，需'破壁'方可晋升。天生灵胎壁开启者为'灵体'（分青龙、白虎、朱雀、玄武四种），修炼事半功倍。主角秦牧为'凡体'，被村长善意欺骗为万古无一的'霸体'，以最基础的'导引功'（霸体三丹功）打下无上根基。'神通'是将武技或法术修炼到极致的体现，如刀出火、拳出雷。",
 28 |           "social_system": "世界分为两部分：1) 大墟：被诅咒的遗弃之地，'天黑别出门'是铁律，黑暗中潜藏致命危险，村落依靠神秘石像庇护。2) 外部世界：以'延康国'为代表，城市繁华，门派林立，如漓江派、天魔教、大雷音寺等。大墟中的居民被称为'神之弃民'，被外界排斥。",
 29 |           "background": "故事始于大墟中的'残老村'，一群实力强大但身有残疾的神秘村民，从江边捡到了主角秦牧。大墟的黑暗、神秘的石像、以及主角的身世（无忧乡）是故事初期的核心谜团。"
 30 |         },
 31 |         "character_guide": {
 32 |           "protagonist": {
 33 |             "background": "身世不明的弃婴，被残老村村民抚养长大，佩戴一枚刻有'秦'字的神秘玉佩。体质为普通'凡体'，却被告知是万古无一的'霸体'。",
 34 |             "initial_personality": "淳朴善良，聪慧好学，对抚养自己的长辈充满信赖与孝心。在村民的'言传身教'下，行事逐渐果决，带有一丝'反派'的狡黠与狠辣。",
 35 |             "growth_path": "从修炼基础导引功开始，通过服用'四灵血'和村民的魔鬼训练淬炼肉身与元气，在生死历练中将各项战技（刀、拳、腿、眼、法）推向极致，意外破开灵胎壁，并成为天魔教少教主，最终踏出大墟，走向更广阔的世界。"
 36 |           },
 37 |         "supporting_roles": [
 38 |             {
 39 |               "role_type": "导师/亲人",
 40 |               "personality": "司婆婆：残老村的裁缝与稳婆，天魔教前教主夫人。看似慈祥的驼背老妪，实则魔功盖世，传授秦牧'天魔造化功'。外冷内热，视秦牧如己出。",
 41 |               "relationship": "秦牧的养母，第一个发现并救下他的人，也是其魔道功法的启蒙者。"
 42 |             },
 43 |             {
 44 |               "role_type": "导师/亲人",
 45 |               "personality": "村长：残老村的灵魂人物，无手无脚，智慧深不可测。'霸体'谎言的缔造者，传授秦牧'霸体三丹功'和无上剑法'剑图'。实为上一个时代的剑神。",
 46 |               "relationship": "秦牧的精神导师和领路人，为秦牧规划了成长道路。"
 47 |             },
 48 |             {
 49 |               "role_type": "导师/亲人",
 50 |               "personality": "瘸子：独腿，看似憨厚老实，实则心思缜密，下手狠辣。传授秦牧'偷天神腿'和为人处世的'狡猾'。实为天下第一神偷。",
 51 |               "relationship": "教导秦牧身法与生存智慧，是秦牧'反派'气质的来源之一。"
 52 |             },
 53 |             {
 54 |               "role_type": "导师/亲人",
 55 |               "personality": "马爷：独臂，性格沉稳，不苟言笑。传授秦牧'雷音八式'。实为大雷音寺弃徒'马王神'。",
 56 |               "relationship": "教导秦牧正统的战技与刚猛的战斗风格。"
 57 |             },
 58 |             {
 59 |               "role_type": "导师/亲人",
 60 |               "personality": "瞎子：眼盲，性格骚情，爱拽文。传授秦牧'九重天开眼法'和枪法。实为一代枪神。",
 61 |               "relationship": "为秦牧开启了超越肉眼的视野，教导他看破虚妄。"
 62 |             },
 63 |             {
 64 |               "role_type": "导师/亲人",
 65 |               "personality": "屠夫：只有上半身，性格癫狂豪迈。传授秦牧'杀猪刀法'。实为'天刀'。",
 66 |               "relationship": "塑造了秦牧勇猛无畏、一往无前的战斗意志。"
 67 |             }
 68 |           ],
 69 |           "antagonists": [
 70 |             {
 71 |               "role_type": "初期反派",
 72 |               "personality": "漓江派众人：自诩正道，前来大墟降妖除魔，实则傲慢自大，滥杀无辜。其弟子（曲师兄、晴师姐等）成为秦牧初期的生死磨刀石。",
 73 |               "conflict_point": "追杀秦牧，迫使其在生死间成长，并引出残老村的强大实力。"
 74 |             },
 75 |             {
 76 |               "role_type": "区域BOSS",
 77 |               "personality": "吴女/小雷音寺铜佛：被镇压在江心古庙的强大妖怪与邪佛，代表了大墟中诡异的危险。",
 78 |               "conflict_point": "秦牧利用其与邪佛的矛盾，意外破开灵胎壁，并展现了智取强敌的能力。"
 79 |             },
 80 |             {
 81 |               "role_type": "亦敌亦友",
 82 |               "personality": "天魔教众人：包括少年祖师、执法长老及三百六十堂堂主。行事亦正亦邪，遵循'强者为尊'的魔道逻辑。",
 83 |               "conflict_point": "为寻回教主夫人（司婆婆）而来，对秦牧进行严酷考验，最终认可其为少教主。"
 84 |             },
 85 |             {
 86 |               "role_type": "潜在威胁",
 87 |               "personality": "延康国：以国家形式存在的超级门派，由'神下第一人'延康国师主导，意图将大墟纳入版图。其军队纪律严明，装备精良，代表了外界文明的强大压迫力。",
 88 |               "conflict_point": "与残老村众人产生冲突，展现了外界势力的冰山一角，是主角未来将要面对的巨大挑战。"
 89 |             }
 90 |           ]
 91 |         },
 92 |         "plot_structure": {
 93 |           "act_one": {
 94 |             "setup": "大墟残老村，少年秦牧在九位残疾长辈的抚养下长大，学习各种奇怪的本领。",
 95 |             "inciting_incident": "秦牧被检测为无法修炼的'凡体'，村长为给予村民希望，谎称其为万古无一的'霸体'，并开始对其进行地狱式训练。",
 96 |             "first_plot_point": "秦牧初出茅庐，遭遇漓江派弟子的追杀，第一次经历生死搏杀，并成功反杀，展现了惊人的战斗天赋和村民们所教技艺的强大。"
 97 |           },
 98 |           "act_two": {
 99 |             "rising_action": "秦牧在历练中不断成长，与魔猿结为好友，智取吴女和镇央宫魔影，探索涌江龙宫，并意外以'魔音'破开灵胎壁，正式踏上修炼之路。",
100 |             "midpoint": "天魔教寻至残老村，秦牧的'霸体'身份面临真正考验。他接受了天魔教三百六十房的挑战，这是一场持续一天一夜的极限战斗。",
101 |             "complications": "秦牧在战斗中不断突破，元气属性相继觉醒（水、火、青龙），其实力让天魔教众人震惊。司婆婆的真实身份（前教主夫人）被揭露。",
102 |             "darkest_moment": "在挑战的最后阶段，秦牧身心俱疲，濒临极限，但他凭借坚定的意志和守护婆婆的信念坚持了下来。",
103 |             "second_plot_point": "秦牧成功打穿三百六十房，其潜力与实力得到天魔教祖师的认可，被正式册封为'少教主'。"
104 |           },
105 |           "act_three": {
106 |             "climax": "延康国大军入侵大墟，兵锋直指镶龙城。残老村众人各显神通，聋子作画阻挡百万军，屠夫、瞎子、瘸子、药师等人纷纷出手示威，展现了残老村恐怖的实力。",
107 |             "resolution": "神秘的'太阳船'出现，其无与伦比的威压迫使延康国师下令退兵，化解了镶龙城的危机。秦牧也因此见识到了大墟更深层次的秘密。",
108 |             "denouement": "村长等人同意秦牧外出历练，秦牧通过了九位长辈的最终考验，正式'成年'。他告别村民，与狐灵儿一同踏上前往延康国的旅程，开启新的篇章。"
109 |           }
110 |         },
111 |         "style_guide": {
112 |           "tone": "整体风格热血、爽快，同时带有浓厚的神秘感和一丝幽默。主角的成长与大墟的秘密探索交织，营造出一种独特的史诗感。",
113 |           "pacing": "节奏明快，事件紧凑。采用单元剧式的冒险结构（如漓江派、天魔教、延康国），每个单元都推动主角实力和心智的成长，并揭示一部分世界观。",
114 |           "description_focus": [
115 |             "战斗描写：注重招式、神通的奇特性和力量感，如'雷音八式'的雷声滚滚，'杀猪刀法'的癫狂霸气，'剑图'的山河壮阔。",
116 |             "世界观塑造：通过秦牧的视角，逐步揭开'大墟'的神秘面纱，如黑暗中的怪物、各种诡异的遗迹（神女关、镇央宫、涌江龙宫）、以及太阳船、月亮船等超乎想象的存在。",
117 |             "人物刻画：残老村村民的形象塑造尤为出色，每个人都有着神秘的过去和独特的性格，他们之间的互动和对秦牧的教导是前期核心看点。"
118 |           ]
119 |         }
120 |       }
121 |     },
122 |     "generation_config": {
123 |       "max_retries": 3,
124 |       "retry_delay": 30,
125 |       "batch_size": 10,
126 |       "model_timeout": 60,
127 |       "max_tokens": 65536,
128 |       "force_rebuild_kb": false,
129 |       "model_selection": {
130 |         "outline": {
131 |           "provider": "volcengine",
132 |           "model_type": "outline"
133 |         },
134 |         "content": {
135 |           "provider": "volcengine",
136 |           "model_type": "content"
137 |         }
138 |       },
139 |       "validation": {
140 |         "check_logic": true,
141 |         "check_consistency": true,
142 |         "check_duplicates": true
143 |       },
144 |       "humanization": {
145 |         "temperature": 0.8,
146 |         "top_p": 0.9,
147 |         "dialogue_ratio": 0.4,
148 |         "description_simplification": true,
149 |         "emotion_enhancement": true
150 |       }
151 |     },
152 |     "output_config": {
153 |       "format": "txt",
154 |       "encoding": "utf-8",
155 |       "save_outline": true,
156 |       "save_character_states": true,
157 |       "output_dir": "data/output_mgs"
158 |     },
159 |     "imitation_config": {
160 |       "enabled": false,
161 |       "auto_imitation": {
162 |         "enabled": false,
163 |         "trigger_all_chapters": false,
164 |         "style_sources": [
165 |           {
166 |             "name": "古风雅致",
167 |             "file_path": "data/style_sources/古风风格参考.txt",
168 |             "description": "古风雅致的文风，适合玄幻仙侠类小说",
169 |             "extra_prompt": "保持古风韵味，增加诗词意境"
170 |           },
171 |           {
172 |             "name": "悬疑紧张",
173 |             "file_path": "data/style_sources/悬疑风格参考.txt", 
174 |             "description": "悬疑紧张的氛围，适合推理惊悚类小说",
175 |             "extra_prompt": "营造悬疑氛围，增加紧张感"
176 |           },
177 |           {
178 |             "name": "热血激昂",
179 |             "file_path": "data/style_sources/热血风格参考.txt",
180 |             "description": "热血激昂的节奏，适合战斗冒险类小说", 
181 |             "extra_prompt": "保持热血节奏，增强战斗场面；注意仅模仿语言风格，不得模仿内容或人物"
182 |           },
183 |           {
184 |             "name": "历史权谋",
185 |             "file_path": "data/style_sources/历史权谋风格参考.txt",
186 |             "description": "历史权谋、朝堂斗争、智谋博弈，适合历史、权谋类小说",
187 |             "extra_prompt": "突出智谋、权力斗争、历史厚重感；注意仅模仿语言风格，不得模仿内容或人物"
188 |           },
189 |           {
190 |             "name": "现代权谋",
191 |             "file_path": "data/style_sources/现代权谋风格参考.txt",
192 |             "description": "现代都市中的权力、商战、职场博弈，适合现代权谋、都市精英类小说",
193 |             "extra_prompt": "突出大国博弈、现代社会权力运作；注意仅模仿语言风格，不得模仿内容或人物"
194 |           },
195 |           {
196 |             "name": "都市轻松",
197 |             "file_path": "data/style_sources/都市轻松风格参考.txt",
198 |             "description": "轻松幽默、日常温馨，适合都市生活、轻喜剧类小说",
199 |             "extra_prompt": "营造轻松氛围，增加幽默和温情；注意仅模仿语言风格，不得模仿内容或人物"
200 |           },
201 |           {
202 |             "name": "科幻冒险",
203 |             "file_path": "data/style_sources/科幻冒险风格参考.txt",
204 |             "description": "未来科技、宇宙冒险、异星探索，适合科幻、冒险类小说",
205 |             "extra_prompt": "突出科技感、冒险精神、世界观新奇；注意仅模仿语言风格，不得模仿内容或人物"
206 |           },
207 |           {
208 |             "name": "青春校园",
209 |             "file_path": "data/style_sources/青春校园风格参考.txt",
210 |             "description": "青春成长、友情爱情、校园生活，适合青春校园、成长类小说",
211 |             "extra_prompt": "突出青春气息、成长烦恼、校园氛围；注意仅模仿语言风格，不得模仿内容或人物"
212 |           }
213 |         ],
214 |         "default_style": "古风雅致",
215 |         "output_suffix": "_imitated",
216 |         "backup_original": false
217 |       },
218 |       "manual_imitation": {
219 |         "enabled": true,
220 |         "default_output_dir": "data/imitation_output",
221 |         "temp_kb_cache_dir": "data/cache/imitation_cache"
222 |       },
223 |       "quality_control": {
224 |         "min_style_similarity": 0.7,
225 |         "max_retries": 3,
226 |         "content_preservation_check": true,
227 |         "style_consistency_check": true
228 |       }
229 |     }
230 | }


--------------------------------------------------------------------------------
/data/cache/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjiazhu/OCNovel/4e2ece92f8c80fc9880cfa44cfb32169570f2ec9/data/cache/.gitkeep


--------------------------------------------------------------------------------
/data/logs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjiazhu/OCNovel/4e2ece92f8c80fc9880cfa44cfb32169570f2ec9/data/logs/.gitkeep


--------------------------------------------------------------------------------
/data/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjiazhu/OCNovel/4e2ece92f8c80fc9880cfa44cfb32169570f2ec9/data/output/.gitkeep


--------------------------------------------------------------------------------
/data/reference/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjiazhu/OCNovel/4e2ece92f8c80fc9880cfa44cfb32169570f2ec9/data/reference/.gitkeep


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import json
  4 | import shutil
  5 | import subprocess
  6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  7 | import argparse
  8 | import logging
  9 | from typing import Optional, Tuple
 10 | from src.config.config import Config
 11 | from src.models.gemini_model import GeminiModel
 12 | from src.models.openai_model import OpenAIModel
 13 | from src.knowledge_base.knowledge_base import KnowledgeBase
 14 | from src.generators.outline.outline_generator import OutlineGenerator
 15 | from src.generators.content.content_generator import ContentGenerator
 16 | from src.generators.finalizer.finalizer import NovelFinalizer
 17 | from src.generators.common.utils import setup_logging
 18 | 
 19 | def init_workspace():
 20 |     """初始化工作目录"""
 21 |     # 创建必要的目录
 22 |     dirs = [
 23 |         "data/cache",
 24 |         "data/output",
 25 |         "data/logs",
 26 |         "data/reference"
 27 |     ]
 28 |     for dir_path in dirs:
 29 |         os.makedirs(dir_path, exist_ok=True)
 30 |         
 31 |     # 创建.gitkeep文件
 32 |     for dir_path in dirs:
 33 |         gitkeep_file = os.path.join(dir_path, ".gitkeep")
 34 |         if not os.path.exists(gitkeep_file):
 35 |             with open(gitkeep_file, 'w') as f:
 36 |                 pass
 37 | 
 38 | def create_model(model_config: dict):
 39 |     """创建AI模型实例"""
 40 |     if model_config["type"] == "gemini":
 41 |         return GeminiModel(model_config)
 42 |     elif model_config["type"] == "openai":
 43 |         return OpenAIModel(model_config)
 44 |     elif model_config["type"] == "volcengine":
 45 |         return OpenAIModel(model_config)  # 复用OpenAI兼容实现
 46 |     else:
 47 |         raise ValueError(f"不支持的模型类型: {model_config['type']}")
 48 | 
 49 | def main():
 50 |     # 初始化工作目录
 51 |     init_workspace()
 52 |     
 53 |     parser = argparse.ArgumentParser(description='小说生成工具')
 54 |     parser.add_argument('--config', type=str, default="config.json", help='配置文件路径')
 55 |     
 56 |     subparsers = parser.add_subparsers(dest='command', help='可用命令')
 57 |     
 58 |     # 大纲生成命令
 59 |     outline_parser = subparsers.add_parser('outline', help='生成小说大纲')
 60 |     outline_parser.add_argument('--start', type=int, required=True, help='起始章节')
 61 |     outline_parser.add_argument('--end', type=int, required=True, help='结束章节')
 62 |     outline_parser.add_argument('--novel-type', type=str, help='小说类型（可选，默认使用配置文件中的设置）')
 63 |     outline_parser.add_argument('--theme', type=str, help='主题（可选，默认使用配置文件中的设置）')
 64 |     outline_parser.add_argument('--style', type=str, help='写作风格（可选，默认使用配置文件中的设置）')
 65 |     outline_parser.add_argument('--extra-prompt', type=str, help='额外提示词')
 66 |     
 67 |     # 内容生成命令
 68 |     content_parser = subparsers.add_parser('content', help='生成章节内容')
 69 |     content_parser.add_argument('--start-chapter', type=int, help='起始章节号')
 70 |     content_parser.add_argument('--target-chapter', type=int, help='指定要重新生成的章节号')
 71 |     content_parser.add_argument('--extra-prompt', type=str, help='额外提示词')
 72 |     
 73 |     # 定稿处理命令
 74 |     finalize_parser = subparsers.add_parser('finalize', help='处理章节定稿')
 75 |     finalize_parser.add_argument('--chapter', type=int, required=True, help='要处理的章节号')
 76 |     
 77 |     # 自动生成命令（包含完整流程）
 78 |     auto_parser = subparsers.add_parser('auto', help='自动执行完整生成流程')
 79 |     auto_parser.add_argument('--extra-prompt', type=str, help='额外提示词')
 80 |     auto_parser.add_argument('--force-outline', action='store_true', help='强制重新生成所有大纲')
 81 | 
 82 |     # 仿写命令
 83 |     imitate_parser = subparsers.add_parser('imitate', help='根据指定的风格范文仿写文本')
 84 |     imitate_parser.add_argument('--style-source', type=str, required=True, help='作为风格参考的源文件路径')
 85 |     imitate_parser.add_argument('--input-file', type=str, required=True, help='需要进行仿写的原始文本文件路径')
 86 |     imitate_parser.add_argument('--output-file', type=str, required=True, help='仿写结果的输出文件路径')
 87 |     imitate_parser.add_argument('--extra-prompt', type=str, help='额外的仿写要求')
 88 |     
 89 |     args = parser.parse_args()
 90 |     
 91 |     # --- 检查并可能生成默认配置文件 ---
 92 |     config_path = args.config
 93 |     if config_path == "config.json" and not os.path.exists(config_path):
 94 |         print(f"默认配置文件 '{config_path}' 不存在。")
 95 |         try:
 96 |             user_theme = input("请输入您的小说主题以生成新的配置文件: ")
 97 |             if not user_theme:
 98 |                 print("未输入主题，无法生成配置文件。程序退出。")
 99 |                 sys.exit(1)
100 | 
101 |             # 获取 generate_config.py 脚本的绝对路径
102 |             script_dir = os.path.dirname(os.path.abspath(__file__))
103 |             generate_script_path = os.path.join(script_dir, "src", "tools", "generate_config.py")
104 | 
105 |             if not os.path.exists(generate_script_path):
106 |                 print(f"错误: 配置文件生成脚本 '{generate_script_path}' 未找到。程序退出。")
107 |                 sys.exit(1)
108 | 
109 |             print(f"正在调用脚本 '{os.path.basename(generate_script_path)}' 生成配置文件 '{config_path}'...")
110 |             # 使用 sys.executable 确保使用当前环境的 Python 解释器
111 |             # 将主题通过 stdin 传递给脚本
112 |             process = subprocess.run(
113 |                 [sys.executable, generate_script_path],
114 |                 input=user_theme,
115 |                 text=True,
116 |                 capture_output=True,
117 |                 check=False # 手动检查返回码
118 |             )
119 | 
120 |             # 打印脚本的输出 (stdout 和 stderr) 以便调试
121 |             print("\n--- 配置文件生成脚本输出 ---")
122 |             if process.stdout:
123 |                 print(process.stdout.strip())
124 |             if process.stderr:
125 |                 print(f"错误输出:\n{process.stderr.strip()}")
126 |             print("--- 脚本输出结束 ---\n")
127 | 
128 | 
129 |             if process.returncode != 0:
130 |                 print(f"自动生成配置文件失败。请检查上述错误信息。程序退出。")
131 |                 sys.exit(1)
132 |             elif not os.path.exists(config_path):
133 |                  print(f"脚本执行成功，但配置文件 '{config_path}' 仍然不存在。请检查脚本逻辑。程序退出。")
134 |                  sys.exit(1)
135 |             else:
136 |                 print(f"配置文件 '{config_path}' 已成功生成。")
137 |                 # 继续执行程序，将使用新生成的配置文件
138 | 
139 |         except Exception as e:
140 |             print(f"尝试生成配置文件时发生意外错误: {e}")
141 |             sys.exit(1)
142 | 
143 |     elif not os.path.exists(config_path):
144 |          print(f"错误: 指定的配置文件 '{config_path}' 未找到。程序退出。")
145 |          sys.exit(1)
146 | 
147 | 
148 |     # 后续代码保持不变，加载配置等
149 |     try:
150 |         # 加载配置 (现在确保 config_path 存在，无论是原有的还是新生成的)
151 |         config = Config(config_path)
152 |         
153 |         # 设置日志
154 |         setup_logging(config.log_config["log_dir"])
155 |         
156 |         # --- 获取小说标题并创建专属备份目录 ---
157 |         novel_title = config.novel_config.get("title")
158 |         if not novel_title:
159 |             logging.error("配置文件 'novel_config' 中缺少 'title' 键，无法创建专属输出目录。")
160 |             novel_title = "default_novel"
161 |             logging.warning(f"将使用默认小说标题: {novel_title}")
162 | 
163 |         safe_novel_title = novel_title
164 | 
165 |         base_output_dir = config.output_config.get("output_dir", "data/output")
166 |         novel_output_dir = os.path.join(base_output_dir, safe_novel_title)
167 |         os.makedirs(novel_output_dir, exist_ok=True)
168 | 
169 |         # 复制配置文件快照
170 |         config_snapshot_path = os.path.join(novel_output_dir, "config_snapshot.json")
171 |         try:
172 |             # 复制加载时使用的 config_path
173 |             shutil.copy2(config_path, config_snapshot_path)
174 |         except Exception as e:
175 |             logging.error(f"复制配置文件快照失败: {e}", exc_info=True)
176 |         
177 |         # 创建模型实例
178 |         from src.config.ai_config import AIConfig
179 |         ai_config = AIConfig()
180 |         
181 |         # 使用 Config 类中创建的模型配置，而不是重新实现
182 |         outline_model_config = config.get_model_config("outline_model")
183 |         content_model_config = config.get_model_config("content_model")
184 |         embedding_model_config = config.get_model_config("embedding_model")
185 |         
186 |         # 创建模型实例
187 |         outline_model = create_model(outline_model_config)
188 |         content_model = create_model(content_model_config)
189 |         embedding_model = create_model(embedding_model_config)
190 |         
191 |         # 创建知识库
192 |         knowledge_base = KnowledgeBase(
193 |             config.knowledge_base_config,
194 |             embedding_model
195 |         )
196 |         
197 |         # --- 实例化 Finalizer ---
198 |         # Instantiate Finalizer early as ContentGenerator might need it
199 |         finalizer = NovelFinalizer(config, content_model, knowledge_base)
200 |         
201 |         # 命令处理
202 |         if args.command == 'outline':
203 |             generator = OutlineGenerator(config, outline_model, knowledge_base, content_model)
204 |             
205 |             # 使用命令行参数或配置文件中的设置
206 |             novel_type = args.novel_type or config.novel_config.get("type")
207 |             theme = args.theme or config.novel_config.get("theme")
208 |             style = args.style or config.novel_config.get("style")
209 |             
210 |             success = generator.generate_outline(
211 |                 novel_type=novel_type,
212 |                 theme=theme,
213 |                 style=style,
214 |                 mode='replace',
215 |                 replace_range=(args.start, args.end),
216 |                 extra_prompt=args.extra_prompt
217 |             )
218 |             print("大纲生成成功！" if success else "大纲生成失败，请查看日志文件了解详细信息。")
219 |             
220 |         elif args.command == 'content':
221 |             # Pass finalizer instance to ContentGenerator
222 |             generator = ContentGenerator(config, content_model, knowledge_base, finalizer=finalizer)
223 |             
224 |             # 处理起始章节和目标章节逻辑
225 |             target_chapter_to_generate = None
226 |             if args.target_chapter is not None:
227 |                 target_chapter_to_generate = args.target_chapter
228 |             else:
229 |                 # 如果指定了起始章节，则设置当前章节索引
230 |                 if args.start_chapter is not None:
231 |                     # Validate start_chapter against loaded outline length if possible?
232 |                     # For now, trust the input or let ContentGenerator handle invalid index later.
233 |                     if args.start_chapter > 0 :
234 |                        generator.current_chapter = args.start_chapter - 1
235 |                        # Save the potentially updated starting point?
236 |                        # generator._save_progress() # Optional: save if you want '--start-chapter' to persist
237 |                     else:
238 |                        logging.warning(f"指定的起始章节 ({args.start_chapter}) 无效，将从上次进度开始。")
239 |             
240 |             # 调用内容生成方法 (removed update_sync_info)
241 |             success = generator.generate_content(
242 |                 target_chapter=target_chapter_to_generate,
243 |                 external_prompt=args.extra_prompt
244 |             )
245 |             print("内容生成成功！" if success else "内容生成失败，请查看日志文件了解详细信息。")
246 |             
247 |         elif args.command == 'finalize':
248 |             # Finalize command remains for manually finalizing a chapter if needed
249 |             # Finalizer is already instantiated
250 |             success = finalizer.finalize_chapter(args.chapter)
251 |             print("章节定稿处理成功！" if success else "章节定稿处理失败，请查看日志文件了解详细信息。")
252 |             
253 |         elif args.command == 'auto':
254 |             # 重新初始化日志系统，并清理旧日志
255 |             setup_logging(config.log_config["log_dir"], clear_logs=True)
256 |             # 自动流程需要实例化所有生成器
257 |             outline_generator = OutlineGenerator(config, outline_model, knowledge_base, content_model)
258 |             # Pass finalizer instance to ContentGenerator
259 |             content_generator = ContentGenerator(config, content_model, knowledge_base, finalizer=finalizer)
260 |             # finalizer is already instantiated
261 |             
262 |             # 从 summary.json 获取当前章节进度
263 |             summary_file = os.path.join(base_output_dir, "summary.json")
264 |             start_chapter_index = 0  # Default to 0 (start from chapter 1)
265 |             if os.path.exists(summary_file):
266 |                 try:
267 |                     with open(summary_file, 'r', encoding='utf-8') as f:
268 |                         summary_data = json.load(f)
269 |                         # 获取最大的章节号作为当前进度
270 |                         chapter_numbers = [int(k) for k in summary_data.keys() if k.isdigit()]
271 |                         start_chapter_index = max(chapter_numbers) if chapter_numbers else 0
272 |                 except (json.JSONDecodeError, ValueError, TypeError) as e:
273 |                     logging.warning(f"读取或解析摘要文件 {summary_file} 失败: {e}. 将从头开始。")
274 |                     start_chapter_index = 0  # Reset on error
275 |             
276 |             content_generator.current_chapter = start_chapter_index # Set generator's start point
277 |             actual_start_chapter_num = start_chapter_index + 1
278 |             
279 |             # 从config.json获取目标章节数
280 |             end_chapter = config.novel_config.get("target_chapters")
281 |             if not end_chapter or not isinstance(end_chapter, int) or end_chapter <= 0:
282 |                 logging.error("配置文件中未找到有效的目标章节数设置 (target_chapters)")
283 |                 return
284 |             
285 |             # 1. 检查并生成大纲
286 |             outline_generator._load_outline() # Ensure outline is loaded
287 |             current_outline_count = len(outline_generator.chapter_outlines)
288 | 
289 |             # 添加强制重新生成选项检查
290 |             force_regenerate = getattr(args, 'force_outline', False)
291 |             
292 |             if current_outline_count < end_chapter or force_regenerate:
293 |                 if force_regenerate:
294 |                     outline_success = outline_generator.generate_outline(
295 |                         novel_type=config.novel_config.get("type"),
296 |                         theme=config.novel_config.get("theme"),
297 |                         style=config.novel_config.get("style"),
298 |                         mode='replace',
299 |                         replace_range=(1, end_chapter),  # 重新生成全部
300 |                         extra_prompt=args.extra_prompt
301 |                     )
302 |                 else:
303 |                     outline_success = outline_generator.generate_outline(
304 |                         novel_type=config.novel_config.get("type"),
305 |                         theme=config.novel_config.get("theme"),
306 |                         style=config.novel_config.get("style"),
307 |                         mode='replace',
308 |                         replace_range=(current_outline_count + 1, end_chapter),
309 |                         extra_prompt=args.extra_prompt
310 |                     )
311 |                 
312 |                 if not outline_success:
313 |                     print("大纲生成失败，停止流程。")
314 |                     return
315 |                 print("大纲生成成功！")
316 |                 # Reload outline in content_generator after modification
317 |                 # content_generator._load_outline() # Moved outside
318 | 
319 |             # Ensure content_generator always loads the outline before proceeding
320 |             content_generator._load_outline()
321 | 
322 |             # Check if start chapter is already beyond target
323 |             if actual_start_chapter_num > end_chapter:
324 |                  content_success = True # Nothing to do, considered success
325 |             else:
326 |                  # 2. 生成内容 (ContentGenerator now handles finalization internally)
327 |                  # The generate_content call will handle chapters from generator.current_chapter up to the end of the outline
328 |                  # We rely on the loaded outline length to determine the end point.
329 |                  # We need to ensure the outline actually covers up to end_chapter
330 |                  # Now that content_generator._load_outline() is guaranteed to run, this check should be correct
331 |                  if len(content_generator.chapter_outlines) < end_chapter:
332 |                       logging.error(f"错误：大纲加载后章节数 ({len(content_generator.chapter_outlines)}) 仍小于目标章节数 ({end_chapter})。")
333 |                       return
334 | 
335 |                  # Call generate_content without target_chapter to process remaining chapters from current_chapter
336 |                  content_success = content_generator.generate_content(
337 |                       external_prompt=args.extra_prompt
338 |                       # Removed update_sync_info
339 |                  )
340 | 
341 |                  if not content_success:
342 |                      print("内容生成或定稿过程中失败，停止流程。")
343 |                      return
344 |                  print("内容生成及定稿成功！")
345 | 
346 |             print("自动生成流程全部完成！")
347 | 
348 |         elif args.command == 'imitate':
349 |             try:
350 |                 # 1. 读取输入文件
351 |                 with open(args.style_source, 'r', encoding='utf-8') as f:
352 |                     style_text = f.read()
353 | 
354 |                 with open(args.input_file, 'r', encoding='utf-8') as f:
355 |                     input_text = f.read()
356 | 
357 |                 # 2. 初始化模型（使用内容生成模型进行仿写）
358 |                 imitation_model = content_model  # 复用已创建的内容生成模型
359 | 
360 |                 # 3. 创建一个临时的、基于风格范文的知识库
361 |                 # 创建一个临时的知识库配置，指向一个专用的仿写缓存目录
362 |                 imitate_kb_config = config.knowledge_base_config.copy()
363 |                 imitate_kb_config["cache_dir"] = os.path.join(config.knowledge_base_config["cache_dir"], "imitation_cache")
364 |                 style_kb = KnowledgeBase(imitate_kb_config, embedding_model)
365 |                 style_kb.build(style_text, force_rebuild=False)
366 | 
367 |                 # 4. 从风格知识库中检索与原始文本最相关的片段作为范例
368 |                 style_examples = style_kb.search(input_text, k=5)
369 | 
370 |                 # 5. 导入并使用新的仿写提示词
371 |                 from src.generators.prompts import get_imitation_prompt
372 |                 prompt = get_imitation_prompt(
373 |                     original_text=input_text,
374 |                     style_examples=style_examples,
375 |                     extra_prompt=args.extra_prompt
376 |                 )
377 | 
378 |                 # 6. 调用模型生成仿写内容
379 |                 imitated_content = imitation_model.generate(prompt)
380 | 
381 |                 # 7. 保存结果
382 |                 with open(args.output_file, 'w', encoding='utf-8') as f:
383 |                     f.write(imitated_content)
384 |                 print(f"仿写成功！结果已保存至 {args.output_file}")
385 | 
386 |             except FileNotFoundError as e:
387 |                 logging.error(f"文件未找到: {e}", exc_info=True)
388 |                 print(f"错误：文件未找到 - {e}")
389 |             except Exception as e:
390 |                 logging.error(f"执行仿写任务时出错: {e}", exc_info=True)
391 |                 print(f"错误：执行仿写任务失败，请查看日志。")
392 |             
393 |         else:
394 |             parser.print_help()
395 |             
396 |     except FileNotFoundError as e:
397 |         logging.error(f"文件未找到错误: {str(e)}。请检查配置文件路径和配置文件中引用的路径是否正确。", exc_info=True)
398 |     except KeyError as e:
399 |         logging.error(f"配置项缺失错误: 键 '{str(e)}' 在配置文件中未找到。请检查 config.json 文件。", exc_info=True)
400 |     except Exception as e:
401 |         logging.error(f"程序执行出错: {str(e)}", exc_info=True)
402 | 
403 | if __name__ == "__main__":
404 |     main()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # AI模型依赖
 2 | openai>=1.0.0
 3 | google-generativeai>=0.3.0
 4 | 
 5 | # 向量数据库和嵌入
 6 | chromadb>=0.4.0
 7 | faiss-cpu>=1.7.4
 8 | FlagEmbedding>=1.2.7
 9 | 
10 | # 中文处理
11 | jieba>=0.42.1
12 | opencc
13 | 
14 | # 核心依赖
15 | python-dotenv>=1.0.0
16 | tenacity>=8.2.0
17 | numpy>=1.24.0
18 | pydantic>=2.0.0
19 | beautifulsoup4
20 | 
21 | # GUI框架
22 | PySide6>=6.5.0
23 | 
24 | # 开发和测试
25 | pytest>=7.4.0
26 | 


--------------------------------------------------------------------------------
/src/config/ai_config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Any
  3 | from dotenv import load_dotenv
  4 | 
  5 | class AIConfig:
  6 |     """AI模型配置管理类"""
  7 |     
  8 |     def __init__(self):
  9 |         # 加载环境变量
 10 |         load_dotenv()
 11 | 
 12 |         # OpenAI 配置（提前定义）
 13 |         self.openai_config = {
 14 |             "retry_delay": float(os.getenv("OPENAI_RETRY_DELAY", "10")),  # 默认 10 秒
 15 |             "models": {
 16 |                 "embedding": {
 17 |                     "name": "Qwen/Qwen3-Embedding-0.6B",
 18 |                     "temperature": 0.7,
 19 |                     "dimension": 1024,
 20 |                     "api_key": os.getenv("OPENAI_EMBEDDING_API_KEY", ""),
 21 |                     "base_url": os.getenv("OPENAI_EMBEDDING_API_BASE", "https://api.siliconflow.cn/v1"),
 22 |                     "timeout": int(os.getenv("OPENAI_EMBEDDING_TIMEOUT", "60"))
 23 |                 },
 24 |                 "outline": {
 25 |                     "name": "deepgeminipro",  # 使用本地服务器支持的模型
 26 |                     "temperature": 1.0,
 27 |                     "api_key": os.getenv("OPENAI_OUTLINE_API_KEY", ""),
 28 |                     "base_url": os.getenv("OPENAI_OUTLINE_API_BASE", "https://api.siliconflow.cn/v1"),
 29 |                     "timeout": int(os.getenv("OPENAI_OUTLINE_TIMEOUT", "120"))
 30 |                 },
 31 |                 "content": {
 32 |                     "name": "deepgeminiflash",  # 使用deepclaude接口的模型
 33 |                     "temperature": 0.7,
 34 |                     "api_key": os.getenv("OPENAI_CONTENT_API_KEY", ""),
 35 |                     "base_url": os.getenv("OPENAI_CONTENT_API_BASE", "https://api.siliconflow.cn/v1"),
 36 |                     "timeout": int(os.getenv("OPENAI_CONTENT_TIMEOUT", "180"))  # 内容生成需要更长时间
 37 |                 },
 38 |                 "reranker": {
 39 |                     "name": os.getenv("OPENAI_RERANKER_MODEL", "Qwen/Qwen3-Reranker-0.6B"),
 40 |                     "api_key": os.getenv("OPENAI_EMBEDDING_API_KEY", ""),
 41 |                     "base_url": os.getenv("OPENAI_EMBEDDING_API_BASE", "https://api.siliconflow.cn/v1"),
 42 |                     "use_fp16": os.getenv("OPENAI_RERANKER_USE_FP16", "True") == "True",
 43 |                     "timeout": int(os.getenv("OPENAI_EMBEDDING_TIMEOUT", "60"))
 44 |                 }
 45 |             }
 46 |         }
 47 |         # Gemini 配置
 48 |         self.gemini_config = {
 49 |             "api_key": os.getenv("GEMINI_API_KEY", ""),
 50 |             "retry_delay": float(os.getenv("GEMINI_RETRY_DELAY", "30")),  # 默认 30 秒
 51 |             "max_retries": int(os.getenv("GEMINI_MAX_RETRIES", "5")),  # 默认 5 次
 52 |             "max_input_length": int(os.getenv("GEMINI_MAX_INPUT_LENGTH", "500000")),  # 默认 500000 字符
 53 |             "timeout": int(os.getenv("GEMINI_TIMEOUT", "60")),  # 默认 60 秒
 54 |             # 备用模型配置
 55 |             "fallback": {
 56 |                 "enabled": os.getenv("GEMINI_FALLBACK_ENABLED", "True") == "True",  # 默认启用备用模型
 57 |                 "api_key": os.getenv("OPENAI_EMBEDDING_API_KEY", ""),  # 使用embedding的API key作为备用
 58 |                 "base_url": os.getenv("GEMINI_FALLBACK_BASE_URL", "https://api.siliconflow.cn/v1"),
 59 |                 "timeout": int(os.getenv("GEMINI_FALLBACK_TIMEOUT", "120")),  # 备用API使用更长的超时时间
 60 |                 "models": {
 61 |                     "flash": "moonshotai/Kimi-K2-Instruct",  # flash模型的备用
 62 |                     "pro": "Qwen/Qwen3-235B-A22B-Thinking-2507",  # pro模型的备用
 63 |                     "default": "deepseek-ai/DeepSeek-V3.1"  # 默认备用模型
 64 |                 }
 65 |             },
 66 |             "models": {
 67 |                 "outline": {
 68 |                     "name": "gemini-2.5-pro",
 69 |                     "temperature": 1.0
 70 |                 },
 71 |                 "content": {
 72 |                     "name": "gemini-2.5-flash",
 73 |                     "temperature": 0.7
 74 |                 }
 75 |             }
 76 |         }
 77 |         # 火山引擎DeepSeek-V3.1配置
 78 |         self.volcengine_config = {
 79 |             "api_key": os.getenv("VOLCENGINE_API_KEY", ""),
 80 |             "api_endpoint": os.getenv("VOLCENGINE_API_ENDPOINT", 
 81 |                                      "https://ark.cn-beijing.volces.com/api/v3"),
 82 |             "thinking_enabled": os.getenv("VOLCENGINE_THINKING_ENABLED", "true").lower() == "true",
 83 |             "timeout": int(os.getenv("VOLCENGINE_TIMEOUT", "120")),
 84 |             "max_tokens": int(os.getenv("VOLCENGINE_MAX_TOKENS", "8192")),
 85 |             "retry_delay": float(os.getenv("VOLCENGINE_RETRY_DELAY", "15")),
 86 |             "max_retries": int(os.getenv("VOLCENGINE_MAX_RETRIES", "3")),
 87 |             # 模型配置
 88 |             "models": {
 89 |                 "outline": {
 90 |                     "name": os.getenv("VOLCENGINE_OUTLINE_MODEL_ID", "deepseek-v3-1-250821"),
 91 |                     "temperature": float(os.getenv("VOLCENGINE_OUTLINE_TEMPERATURE", "1.0"))
 92 |                 },
 93 |                 "content": {
 94 |                     "name": os.getenv("VOLCENGINE_CONTENT_MODEL_ID", "deepseek-v3-1-250821"),
 95 |                     "temperature": float(os.getenv("VOLCENGINE_CONTENT_TEMPERATURE", "0.7"))
 96 |                 }
 97 |             },
 98 |             # 备用模型配置
 99 |             "fallback": {
100 |                 "enabled": os.getenv("VOLCENGINE_FALLBACK_ENABLED", "true").lower() == "true",
101 |                 "provider": "openai",  # 备用到OpenAI兼容模型
102 |                 "model_name": "deepseek-ai/DeepSeek-V3.1",
103 |                 "api_key": os.getenv("OPENAI_EMBEDDING_API_KEY", ""),  # 使用嵌入模型的API密钥作为备用
104 |                 "base_url": "https://api.siliconflow.cn/v1"  # 使用硅基流动的API地址
105 |             }
106 |         }
107 |         # 验证配置
108 |         self._validate_config()
109 |     
110 |     def _validate_config(self):
111 |         """验证配置是否有效"""
112 |         # 验证 Gemini 配置
113 |         if not self.gemini_config["api_key"]:
114 |             raise ValueError("未设置 GEMINI_API_KEY 环境变量")
115 |             
116 |         # 验证 OpenAI 配置
117 |         for model_type, model_config in self.openai_config["models"].items():
118 |             if not model_config["api_key"]:
119 |                 raise ValueError(f"未设置 OPENAI_{model_type.upper()}_API_KEY 环境变量")
120 |             if not model_config["base_url"]:
121 |                 raise ValueError(f"未设置 OPENAI_{model_type.upper()}_API_BASE 环境变量")
122 |                 
123 |         # 验证火山引擎配置（仅在api_key存在时验证）
124 |         if self.volcengine_config["api_key"]:
125 |             if not self.volcengine_config["api_endpoint"]:
126 |                 raise ValueError("火山引擎API Key已设置但缺少API端点配置")
127 |             # 验证模型配置
128 |             for model_type in ["outline", "content"]:
129 |                 if not self.volcengine_config["models"][model_type]["name"]:
130 |                     raise ValueError(f"火山引擎{model_type}模型配置缺少模型ID")
131 |     
132 |     def get_gemini_config(self, model_type: str = "content") -> Dict[str, Any]:
133 |         """获取 Gemini 模型配置"""
134 |         if model_type not in self.gemini_config["models"]:
135 |             raise ValueError(f"不支持的 Gemini 模型类型: {model_type}")
136 |             
137 |         config = {
138 |             "type": "gemini",
139 |             "api_key": self.gemini_config["api_key"],
140 |             "model_name": self.gemini_config["models"][model_type]["name"],
141 |             "temperature": self.gemini_config["models"][model_type]["temperature"],
142 |             "retry_delay": self.gemini_config["retry_delay"],
143 |             "max_retries": self.gemini_config["max_retries"],
144 |             "max_input_length": self.gemini_config["max_input_length"],
145 |             "timeout": self.gemini_config["timeout"]
146 |         }
147 |         
148 |         # 添加备用模型配置
149 |         if self.gemini_config["fallback"]["enabled"]:
150 |             config.update({
151 |                 "fallback_enabled": True,
152 |                 "fallback_api_key": self.gemini_config["fallback"]["api_key"],
153 |                 "fallback_base_url": self.gemini_config["fallback"]["base_url"],
154 |                 "fallback_timeout": self.gemini_config["fallback"]["timeout"],
155 |                 "fallback_models": self.gemini_config["fallback"]["models"]
156 |             })
157 |         else:
158 |             config["fallback_enabled"] = False
159 |             
160 |         return config
161 |     
162 |     def get_volcengine_config(self, model_type: str = "content") -> Dict[str, Any]:
163 |         """获取火山引擎模型配置"""
164 |         if not self.volcengine_config["api_key"]:
165 |             raise ValueError("未设置 VOLCENGINE_API_KEY 环境变量")
166 |             
167 |         if model_type not in self.volcengine_config["models"]:
168 |             raise ValueError(f"不支持的火山引擎模型类型: {model_type}")
169 |             
170 |         model_config = self.volcengine_config["models"][model_type]
171 |         
172 |         config = {
173 |             "type": "volcengine",
174 |             "api_key": self.volcengine_config["api_key"],
175 |             "base_url": self.volcengine_config["api_endpoint"],
176 |             "model_name": model_config["name"],
177 |             "thinking_enabled": self.volcengine_config["thinking_enabled"],
178 |             "temperature": model_config["temperature"],
179 |             "max_tokens": self.volcengine_config["max_tokens"],
180 |             "timeout": self.volcengine_config["timeout"],
181 |             "retry_delay": self.volcengine_config["retry_delay"],
182 |             "max_retries": self.volcengine_config["max_retries"]
183 |         }
184 |         
185 |         # 添加备用模型配置
186 |         if self.volcengine_config["fallback"]["enabled"]:
187 |             config.update({
188 |                 "fallback_enabled": True,
189 |                 "fallback_api_key": self.volcengine_config["fallback"]["api_key"],
190 |                 "fallback_base_url": self.volcengine_config["fallback"]["base_url"],
191 |                 "fallback_model_name": self.volcengine_config["fallback"]["model_name"]
192 |             })
193 |         else:
194 |             config["fallback_enabled"] = False
195 |             
196 |         return config
197 |     
198 |     def get_openai_config(self, model_type: str = "embedding") -> Dict[str, Any]:
199 |         """获取 OpenAI 模型配置"""
200 |         if model_type not in self.openai_config["models"]:
201 |             raise ValueError(f"不支持的 OpenAI 模型类型: {model_type}")
202 |         model_config = self.openai_config["models"][model_type]
203 |         # 针对reranker类型，返回专用字段
204 |         if model_type == "reranker":
205 |             return {
206 |                 "type": "openai",
207 |                 "api_key": model_config["api_key"],
208 |                 "base_url": model_config["base_url"],
209 |                 "model_name": model_config["name"],
210 |                 "use_fp16": model_config.get("use_fp16", True),
211 |                 "retry_delay": self.openai_config["retry_delay"],
212 |                 "timeout": model_config.get("timeout", 60)
213 |             }
214 |         return {
215 |             "type": "openai",
216 |             "api_key": model_config["api_key"],
217 |             "base_url": model_config["base_url"],
218 |             "model_name": model_config["name"],
219 |             "temperature": model_config["temperature"],
220 |             "dimension": model_config.get("dimension", 1024),
221 |             "retry_delay": self.openai_config["retry_delay"],
222 |             "timeout": model_config.get("timeout", 60)
223 |         }
224 |     
225 |     def get_model_config(self, model_type: str) -> Dict[str, Any]:
226 |         """获取指定类型的模型配置"""
227 |         if model_type.startswith("gemini"):
228 |             return self.get_gemini_config(model_type.split("_")[1])
229 |         elif model_type.startswith("openai"):
230 |             return self.get_openai_config(model_type.split("_")[1])
231 |         else:
232 |             raise ValueError(f"不支持的模型类型: {model_type}")
233 | 
234 |     def get_model_config_by_purpose(self, model_purpose: str) -> Dict[str, Any]:
235 |         """根据用途获取模型配置"""
236 |         # 这个方法需要外部传入config和ai_config实例
237 |         # 暂时保留但标记为需要重构
238 |         raise NotImplementedError("此方法需要重构，请使用get_gemini_config或get_openai_config方法")


--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any
  2 | import os
  3 | import json
  4 | import logging
  5 | from dotenv import load_dotenv
  6 | from .ai_config import AIConfig
  7 | 
  8 | def _sanitize_config_for_logging(config: Dict[str, Any]) -> Dict[str, Any]:
  9 |     """
 10 |     清理配置对象中的敏感信息，用于安全的日志输出
 11 |     
 12 |     Args:
 13 |         config: 原始配置字典
 14 |         
 15 |     Returns:
 16 |         清理后的配置字典，敏感信息已被替换为星号
 17 |     """
 18 |     if not isinstance(config, dict):
 19 |         return config
 20 |         
 21 |     sanitized = {}
 22 |     sensitive_keys = {'api_key', 'fallback_api_key', 'password', 'secret', 'token'}
 23 |     
 24 |     for key, value in config.items():
 25 |         if isinstance(value, dict):
 26 |             sanitized[key] = _sanitize_config_for_logging(value)
 27 |         elif any(sensitive_key in key.lower() for sensitive_key in sensitive_keys):
 28 |             # 如果值不为空，则显示前4位和后4位，中间用星号替代
 29 |             if value and len(str(value)) > 8:
 30 |                 sanitized[key] = f"{str(value)[:4]}****{str(value)[-4:]}"
 31 |             elif value:
 32 |                 sanitized[key] = "****"
 33 |             else:
 34 |                 sanitized[key] = "未设置"
 35 |         else:
 36 |             sanitized[key] = value
 37 |     
 38 |     return sanitized
 39 | 
 40 | class Config:
 41 |     """配置管理类"""
 42 |     
 43 |     def __init__(self, config_file: str = "config.json"):
 44 |         """
 45 |         初始化配置
 46 |         
 47 |         Args:
 48 |             config_file: 配置文件路径
 49 |         """
 50 |         self.base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 51 |         # 如果配置文件路径不是绝对路径，则相对于项目根目录
 52 |         if not os.path.isabs(config_file):
 53 |             self.config_file = os.path.join(self.base_dir, config_file)
 54 |         else:
 55 |             self.config_file = config_file
 56 |         
 57 |         # 加载环境变量
 58 |         load_dotenv()
 59 |         
 60 |         # 加载配置文件
 61 |         with open(self.config_file, 'r', encoding='utf-8') as f:
 62 |             self.config = json.load(f)
 63 |             
 64 |         # 初始化 AI 配置
 65 |         self.ai_config = AIConfig()
 66 |         
 67 |         # 从配置文件中读取 output_dir
 68 |         config_output_dir = self.config["output_config"].get("output_dir")
 69 |         
 70 |         # 优先使用config.json中的model_config，如果没有则使用AIConfig的默认配置
 71 |         if "model_config" in self.config:
 72 |             # 使用配置文件中的model_config
 73 |             self.model_config = self.config["model_config"].copy()
 74 |             logging.info("使用配置文件中的model_config")
 75 |         else:
 76 |             # 动态AI模型配置，根据config.json的model_selection字段
 77 |             self.model_config = {}
 78 |             model_selection = self.config["generation_config"].get("model_selection", {})
 79 |             # outline_model
 80 |             outline_sel = model_selection.get("outline", {"provider": "gemini", "model_type": "outline"})
 81 |             if outline_sel["provider"] == "volcengine":
 82 |                 self.model_config["outline_model"] = self.ai_config.get_volcengine_config(outline_sel["model_type"])
 83 |             elif outline_sel["provider"] == "openai":
 84 |                 self.model_config["outline_model"] = self.ai_config.get_openai_config(outline_sel["model_type"])
 85 |             else:
 86 |                 self.model_config["outline_model"] = self.ai_config.get_gemini_config(outline_sel["model_type"])
 87 |             # content_model
 88 |             content_sel = model_selection.get("content", {"provider": "gemini", "model_type": "content"})
 89 |             if content_sel["provider"] == "volcengine":
 90 |                 self.model_config["content_model"] = self.ai_config.get_volcengine_config(content_sel["model_type"])
 91 |             elif content_sel["provider"] == "openai":
 92 |                 self.model_config["content_model"] = self.ai_config.get_openai_config(content_sel["model_type"])
 93 |             else:
 94 |                 self.model_config["content_model"] = self.ai_config.get_gemini_config(content_sel["model_type"])
 95 |             # embedding_model 只支持openai
 96 |             self.model_config["embedding_model"] = self.ai_config.get_openai_config("embedding")
 97 |             logging.info("使用AIConfig的默认model_config")
 98 |         
 99 |         # 小说配置
100 |         self.novel_config = self.config["novel_config"]
101 |         
102 |         # 知识库配置
103 |         self.knowledge_base_config = self.config["knowledge_base_config"]
104 |         self.knowledge_base_config["reference_files"] = [
105 |             os.path.join(self.base_dir, file_path)
106 |             for file_path in self.knowledge_base_config["reference_files"]
107 |         ]
108 |         
109 |         # 生成器配置
110 |         self.generator_config = {
111 |             "target_chapters": self.novel_config["target_chapters"],
112 |             "chapter_length": self.novel_config["chapter_length"],
113 |             "output_dir": config_output_dir if config_output_dir else os.path.join(self.base_dir, "data", "output"),
114 |             "max_retries": self.config["generation_config"]["max_retries"],
115 |             "retry_delay": self.config["generation_config"]["retry_delay"],
116 |             "validation": self.config["generation_config"]["validation"]
117 |         }
118 |         
119 |         # 日志配置
120 |         self.log_config = {
121 |             "log_dir": os.path.join(self.base_dir, "data", "logs"),
122 |             "log_level": "INFO",
123 |             "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
124 |         }
125 |         
126 |         # 输出配置
127 |         self.output_config = self.config["output_config"]
128 |         self.output_config.update({
129 |             "output_dir": config_output_dir if config_output_dir else os.path.join(self.base_dir, "data", "output")
130 |         })
131 |         
132 |         # 仿写配置
133 |         self.imitation_config = self.config.get("imitation_config", {})
134 | 
135 |         # 启动时打印当前 model_config 便于调试（安全输出）
136 |         logging.info(f"[调试] 当前 model_config: {_sanitize_config_for_logging(self.model_config)}")
137 |     
138 |     def get_model_config(self, model_type: str) -> Dict[str, Any]:
139 |         """
140 |         获取指定类型的模型配置
141 |         
142 |         Args:
143 |             model_type: 模型类型（outline_model/content_model/embedding_model/imitation_model）
144 |             
145 |         Returns:
146 |             Dict[str, Any]: 模型配置
147 |         """
148 |         if model_type in self.model_config:
149 |             return self.model_config[model_type]
150 |         raise ValueError(f"不支持的模型类型: {model_type}")
151 |     
152 |     def get_writing_guide(self) -> Dict:
153 |         """获取写作指南"""
154 |         return self.novel_config["writing_guide"]
155 |         
156 |     def save(self):
157 |         """保存配置到文件"""
158 |         config = {
159 |             "novel_config": self.novel_config,
160 |             "generation_config": {
161 |                 "max_retries": self.generator_config["max_retries"],
162 |                 "retry_delay": self.generator_config["retry_delay"],
163 |                 "validation": self.generator_config["validation"]
164 |             },
165 |             "output_config": self.output_config
166 |         }
167 |         
168 |         with open(self.config_file, 'w', encoding='utf-8') as f:
169 |             json.dump(config, f, ensure_ascii=False, indent=2)
170 |     
171 |     def __getattr__(self, name: str) -> Any:
172 |         """获取配置项"""
173 |         if name in self.config:
174 |             return self.config[name]
175 |         raise AttributeError(f"Config has no attribute '{name}'")
176 | 
177 |     def get_imitation_model(self) -> Dict[str, Any]:
178 |         """
179 |         获取仿写专用模型配置，优先级：
180 |         1. model_config['imitation_model']
181 |         2. content_model（默认使用当前内容生成模型）
182 |         3. ai_config.gemini_config['fallback']（作为最后备用选项）
183 |         """
184 |         # 1. 优先使用 model_config['imitation_model']
185 |         if "imitation_model" in self.model_config:
186 |             logging.info(f"[仿写模型选择] 使用 model_config['imitation_model']: {_sanitize_config_for_logging(self.model_config['imitation_model'])}")
187 |             return self.model_config["imitation_model"]
188 |         # 2. 默认使用 content_model（推荐）
189 |         content_model = self.model_config.get("content_model")
190 |         if content_model:
191 |             logging.info(f"[仿写模型选择] 使用 content_model: {_sanitize_config_for_logging(content_model)}")
192 |             return content_model
193 |         # 3. 最后使用 ai_config.gemini_config['fallback'] 作为备用
194 |         fallback = getattr(self.ai_config, "gemini_config", {}).get("fallback")
195 |         if fallback and fallback.get("enabled", False):
196 |             fallback_model_name = fallback.get("models", {}).get("default", "deepseek-ai/DeepSeek-V3.1")
197 |             imitation_fallback_config = {
198 |                 "type": "gemini",
199 |                 "model_name": fallback_model_name,
200 |                 "api_key": fallback.get("api_key", ""),
201 |                 "base_url": fallback.get("base_url", "https://api.siliconflow.cn/v1"),
202 |                 "timeout": fallback.get("timeout", 180),
203 |             }
204 |             logging.info(f"[仿写模型选择] 使用 gemini_config['fallback'] 作为最后备用: {_sanitize_config_for_logging(imitation_fallback_config)}")
205 |             return imitation_fallback_config
206 |         # 4. 如果所有配置都不可用，抛出异常
207 |         raise ValueError("无法获取仿写模型配置：未配置 imitation_model、content_model 或 fallback 模型") 


--------------------------------------------------------------------------------
/src/generators/common/data_structures.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Dict, List
 3 | 
 4 | @dataclass
 5 | class ChapterOutline:
 6 |     """章节大纲数据结构"""
 7 |     chapter_number: int
 8 |     title: str
 9 |     key_points: List[str]
10 |     characters: List[str]
11 |     settings: List[str]
12 |     conflicts: List[str]
13 | 
14 | @dataclass
15 | class NovelOutline:
16 |     """小说大纲数据结构"""
17 |     title: str
18 |     chapters: List[ChapterOutline]
19 | 
20 | @dataclass
21 | class Character:
22 |     """角色数据结构"""
23 |     name: str
24 |     role: str  # 主角、配角、反派等
25 |     personality: Dict[str, float]  # 性格特征权重
26 |     goals: List[str]
27 |     relationships: Dict[str, str]
28 |     development_stage: str  # 当前发展阶段
29 |     alignment: str = "中立"  # 阵营：正派、反派、中立等，默认为中立
30 |     realm: str = "凡人"      # 境界，例如：凡人、炼气、筑基、金丹等，默认为凡人
31 |     level: int = 1          # 等级，默认为1
32 |     cultivation_method: str = "无" # 功法，默认为无
33 |     magic_treasure: List[str] = field(default_factory=list) # 法宝列表，默认为空列
34 |     temperament: str = "平和"    # 性情，默认为平和
35 |     ability: List[str] = field(default_factory=list)      # 能力列表，默认为空列
36 |     stamina: int = 100        # 体力值，默认为100
37 |     sect: str = "无门无派"      # 门派，默认为无门无派
38 |     position: str = "普通弟子"    # 职务，默认为普通弟子
39 |     emotions_history: List[str] = field(default_factory=list)  # 情绪历史记录
40 |     states_history: List[str] = field(default_factory=list)    # 状态历史记录
41 |     descriptions_history: List[str] = field(default_factory=list)  # 描述历史记录 


--------------------------------------------------------------------------------
/src/generators/common/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | import sys # 引入 sys 模块以访问 stdout
 5 | from logging.handlers import RotatingFileHandler # 推荐使用 RotatingFileHandler 以防日志文件过大
 6 | from typing import Dict, List, Optional, Any
 7 | from opencc import OpenCC
 8 | 
 9 | def setup_logging(log_dir: str, clear_logs: bool = False):
10 |     """设置日志系统"""
11 |     root_logger = logging.getLogger()
12 |     
13 |     # 清理所有现有的处理器，避免重复
14 |     for handler in root_logger.handlers[:]:
15 |         root_logger.removeHandler(handler)
16 |         handler.close()
17 | 
18 |     # 清理旧的日志文件
19 |     log_file = os.path.join(log_dir, "generation.log")
20 |     if clear_logs and os.path.exists(log_file):
21 |         try:
22 |             os.remove(log_file)
23 |         except Exception as e:
24 |             print(f"清理日志文件失败: {e}")
25 | 
26 |     # 配置根日志记录器
27 |     root_logger.setLevel(logging.INFO)
28 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
29 | 
30 |     # 添加文件处理器
31 |     file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
32 |     file_handler.setFormatter(formatter)
33 |     root_logger.addHandler(file_handler)
34 | 
35 |     # 添加控制台处理器
36 |     console_handler = logging.StreamHandler()
37 |     console_handler.setFormatter(formatter)
38 |     root_logger.addHandler(console_handler)
39 | 
40 |     logging.info("日志系统初始化完成，将输出到文件和终端。")
41 | 
42 | def load_json_file(file_path: str, default_value: Any = None) -> Any:
43 |     """加载JSON文件"""
44 |     try:
45 |         if os.path.exists(file_path):
46 |             with open(file_path, 'r', encoding='utf-8') as f:
47 |                 return json.load(f)
48 |     except Exception as e:
49 |         logging.error(f"加载JSON文件 {file_path} 时出错: {str(e)}")
50 |     return default_value
51 | 
52 | def save_json_file(file_path: str, data: Any) -> bool:
53 |     """保存数据到JSON文件"""
54 |     try:
55 |         # 确保目录存在
56 |         os.makedirs(os.path.dirname(file_path), exist_ok=True)
57 |         with open(file_path, 'w', encoding='utf-8') as f:
58 |             json.dump(data, f, ensure_ascii=False, indent=2)
59 |         logging.info(f"成功保存JSON文件: {file_path}") # 添加成功保存日志
60 |         return True
61 |     except Exception as e:
62 |         logging.error(f"保存JSON文件 {file_path} 时出错: {str(e)}", exc_info=True) # 增加 exc_info 以打印完整堆栈信息
63 |         return False
64 | 
65 | def clean_text(text: str) -> str:
66 |     """清理文本内容"""
67 |     # 创建繁简转换器
68 |     t2s = OpenCC('t2s')
69 |     # 转换为简体
70 |     return t2s.convert(text.strip())
71 | 
72 | def validate_directory(directory: str) -> bool:
73 |     """验证目录是否存在，不存在则创建"""
74 |     try:
75 |         os.makedirs(directory, exist_ok=True)
76 |         return True
77 |     except Exception as e:
78 |         logging.error(f"创建目录 {directory} 时出错: {str(e)}")
79 |         return False 


--------------------------------------------------------------------------------
/src/generators/content/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenjiazhu/OCNovel/4e2ece92f8c80fc9880cfa44cfb32169570f2ec9/src/generators/content/__init__.py


--------------------------------------------------------------------------------
/src/generators/content/consistency_checker.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 一致性检查模块 - 负责检查和修正章节内容的一致性
  3 | 
  4 | 此模块提供了两个主要功能：
  5 | 1. 检查章节内容的一致性，包括主题、情节、角色、世界观和逻辑等五个维度
  6 | 2. 根据一致性检查报告修正章节内容
  7 | """
  8 | 
  9 | import os
 10 | import json
 11 | import logging
 12 | import re
 13 | import dataclasses
 14 | from typing import Dict, Tuple, Any, List, Optional
 15 | 
 16 | # 导入提示词模块
 17 | from .. import prompts
 18 | 
 19 | class ConsistencyChecker:
 20 |     """小说章节内容一致性检查器类"""
 21 |     
 22 |     def __init__(self, content_model, output_dir: str):
 23 |         """
 24 |         初始化一致性检查器
 25 |         
 26 |         Args:
 27 |             content_model: 用于生成内容的模型
 28 |             output_dir: 输出目录路径
 29 |         """
 30 |         self.content_model = content_model
 31 |         self.output_dir = output_dir
 32 |         self.min_acceptable_score = 75  # 最低可接受分数
 33 |         self.max_revision_attempts = 3  # 最大修正尝试次数
 34 |     
 35 |     def check_chapter_consistency(
 36 |         self,
 37 |         chapter_content: str,
 38 |         chapter_outline: Dict[str, Any],
 39 |         chapter_idx: int,
 40 |         characters: Dict[str, Any] = None,
 41 |         previous_scene: str = "",
 42 |         sync_info: Optional[str] = None
 43 |     ) -> Tuple[str, bool, int]:
 44 |         """
 45 |         检查章节内容一致性，并返回检查报告和是否需要修改
 46 |         
 47 |         Args:
 48 |             chapter_content: 待检查章节内容
 49 |             chapter_outline: 章节大纲
 50 |             chapter_idx: 章节索引
 51 |             characters: 角色信息字典（可选）
 52 |             previous_scene: 前一章的场景信息（可选）
 53 |             sync_info: 同步信息（替代 global_summary）
 54 |             
 55 |         Returns:
 56 |             tuple: (检查报告, 是否需要修改, 评分)
 57 |         """
 58 |         logging.info(f"第 {chapter_idx + 1} 章: 开始一致性检查...")
 59 |         
 60 |         # 获取上一章摘要（保留）
 61 |         previous_summary = self._get_previous_summary(chapter_idx)
 62 |         
 63 |         # 角色信息获取已注释掉，使用空字符串
 64 |         character_info = ""
 65 |         
 66 |         # 生成一致性检查的提示词 - 移除 global_summary，仅传递 sync_info
 67 |         prompt = prompts.get_consistency_check_prompt(
 68 |             chapter_content=chapter_content,
 69 |             chapter_outline=chapter_outline,
 70 |             previous_summary=previous_summary,
 71 |             character_info=character_info,
 72 |             previous_scene=previous_scene,
 73 |             sync_info=sync_info  # 使用 sync_info 替代 global_summary
 74 |         )
 75 |         
 76 |         # 调用模型进行检查
 77 |         try:
 78 |             check_result = self.content_model.generate(prompt)
 79 |             
 80 |             # 解析检查结果
 81 |             needs_revision = "需要修改" in check_result
 82 |             
 83 |             # 提取分数
 84 |             score_match = re.search(r'\[总体评分\]\s*:\s*(\d+)', check_result)
 85 |             score = int(score_match.group(1)) if score_match else 0
 86 |             
 87 |             logging.info(f"第 {chapter_idx + 1} 章: 一致性检查完成，得分: {score}，{'需要修改' if needs_revision else '无需修改'}")
 88 |             
 89 |             return check_result, needs_revision, score
 90 |         
 91 |         except Exception as e:
 92 |             logging.error(f"第 {chapter_idx + 1} 章: 一致性检查出错: {str(e)}")
 93 |             return "一致性检查出错", True, 0
 94 |     
 95 |     def revise_chapter(
 96 |         self,
 97 |         chapter_content: str,
 98 |         consistency_report: str,
 99 |         chapter_outline: Dict[str, Any],
100 |         chapter_idx: int
101 |     ) -> str:
102 |         """
103 |         根据一致性检查报告修正章节内容
104 |         
105 |         Args:
106 |             chapter_content: 原章节内容
107 |             consistency_report: 一致性检查报告
108 |             chapter_outline: 章节大纲
109 |             chapter_idx: 章节索引
110 |             
111 |         Returns:
112 |             str: 修正后的章节内容
113 |         """
114 |         logging.info(f"第 {chapter_idx + 1} 章: 开始根据一致性检查报告修正内容...")
115 |         
116 |         # 获取上一章摘要
117 |         previous_summary = self._get_previous_summary(chapter_idx)
118 |         
119 |         # 生成修正提示词
120 |         prompt = prompts.get_chapter_revision_prompt(
121 |             original_content=chapter_content,
122 |             consistency_report=consistency_report,
123 |             chapter_outline=chapter_outline,
124 |             previous_summary=previous_summary
125 |         )
126 |         
127 |         # 调用模型进行修正
128 |         try:
129 |             revised_content = self.content_model.generate(prompt)
130 |             logging.info(f"第 {chapter_idx + 1} 章: 内容修正完成")
131 |             return revised_content
132 |         except Exception as e:
133 |             logging.error(f"第 {chapter_idx + 1} 章: 内容修正出错: {str(e)}")
134 |             return chapter_content  # 修正失败时返回原内容
135 |     
136 |     def ensure_chapter_consistency(
137 |         self,
138 |         chapter_content: str,
139 |         chapter_outline: Dict[str, Any],
140 |         chapter_idx: int,
141 |         characters: Dict[str, Any] = None,
142 |         previous_scene: str = "",
143 |         sync_info: Optional[str] = None
144 |     ) -> str:
145 |         """
146 |         确保章节内容的一致性，进行必要的检查和修正
147 |         
148 |         Args:
149 |             chapter_content: 章节内容
150 |             chapter_outline: 章节大纲
151 |             chapter_idx: 章节索引
152 |             characters: 角色信息字典（可选）
153 |             previous_scene: 前一章的场景信息（可选）
154 |             sync_info: 同步信息（可选）
155 |         """
156 |         # 进行一致性检查和修正的循环
157 |         for attempt in range(self.max_revision_attempts):
158 |             # 进行一致性检查 - 传递 previous_scene 和 sync_info
159 |             consistency_report, needs_revision, score = self.check_chapter_consistency(
160 |                 chapter_content, chapter_outline, chapter_idx, characters, previous_scene, sync_info
161 |             )
162 |             
163 |             # 如果分数达标或不需要修改，则跳出循环
164 |             if score >= self.min_acceptable_score or not needs_revision:
165 |                 logging.info(f"第 {chapter_idx + 1} 章: 内容一致性检查通过，得分: {score}")
166 |                 break
167 |                 
168 |             # 否则进行修正
169 |             logging.info(f"第 {chapter_idx + 1} 章: 第 {attempt + 1} 次修正尝试，当前分数: {score}")
170 |             chapter_content = self.revise_chapter(
171 |                 chapter_content, consistency_report, chapter_outline, chapter_idx
172 |             )
173 |             
174 |             # 如果是最后一次尝试，再次检查但不再修改
175 |             if attempt == self.max_revision_attempts - 1:
176 |                 final_report, _, final_score = self.check_chapter_consistency(
177 |                     chapter_content, chapter_outline, chapter_idx, characters, previous_scene, sync_info
178 |                 )
179 |                 logging.info(f"第 {chapter_idx + 1} 章: 完成所有修正尝试，最终分数: {final_score}")
180 |         
181 |         return chapter_content
182 |     
183 |     def _get_global_summary(self, chapter_idx: int) -> str:
184 |         """获取全局摘要"""
185 |         method_name = "_get_global_summary" # For logging clarity
186 |         logging.debug(f"[{method_name}] Called for chapter_idx: {chapter_idx}")
187 |         global_summary = ""
188 |         summary_file = os.path.join(self.output_dir, "summary.json")
189 |         logging.debug(f"[{method_name}] Summary file path: {summary_file}")
190 |         # 检查摘要文件是否存在
191 |         if os.path.exists(summary_file):
192 |             logging.debug(f"[{method_name}] Summary file exists.")
193 |             try:
194 |                 logging.debug(f"[{method_name}] Entering try block to read summary file.")
195 |                 # 打开并读取摘要文件
196 |                 with open(summary_file, 'r', encoding='utf-8') as f:
197 |                     # 首先加载摘要文件内容到 summaries 字典
198 |                     logging.debug(f"[{method_name}] Loading JSON from file...")
199 |                     summaries = json.load(f)
200 |                     logging.debug(f"[{method_name}] JSON loaded. Type: {type(summaries)}. Content (first 500 chars): {str(summaries)[:500]}")
201 | 
202 |                     # 确保 summaries 是字典
203 |                     if not isinstance(summaries, dict):
204 |                          logging.error(f"[{method_name}] Loaded summaries is not a dictionary! Type: {type(summaries)}")
205 |                          return "" # 返回空字符串，避免后续错误
206 | 
207 |                     # 全局摘要可以考虑组合多个章节的摘要
208 |                     if len(summaries) > 0:
209 |                         logging.debug(f"[{method_name}] Processing summaries dictionary...")
210 |                         # 使用列表推导式构建摘要列表
211 |                         summary_parts = []
212 |                         for k, v in summaries.items():
213 |                             logging.debug(f"[{method_name}] Checking summary key: '{k}'")
214 |                             try:
215 |                                 # 尝试将 key 转换为整数进行比较
216 |                                 if int(k) < chapter_idx:
217 |                                     logging.debug(f"[{method_name}] Key '{k}' is valid and less than {chapter_idx}. Adding value.")
218 |                                     summary_parts.append(v)
219 |                                 else:
220 |                                      logging.debug(f"[{method_name}] Key '{k}' is not less than {chapter_idx}. Skipping.")
221 |                             except ValueError:
222 |                                 # 如果 key 不能转换为整数，记录警告并跳过
223 |                                 logging.warning(f"[{method_name}] Summary key '{k}' is not a valid integer. Skipping.")
224 |                         # 组合摘要并截取最后 2000 字符
225 |                         global_summary = "\n".join(summary_parts)[-2000:]
226 |                         logging.debug(f"[{method_name}] Combined global_summary (first 100 chars): '{global_summary[:100]}'")
227 |                     else:
228 |                         logging.debug(f"[{method_name}] Summaries dictionary is empty.")
229 | 
230 |             # 使用更具体的异常处理
231 |             except json.JSONDecodeError as e:
232 |                 logging.error(f"[{method_name}] 解析摘要文件 {summary_file} 失败: {e}")
233 |             except Exception as e:
234 |                 # Log the full traceback for unexpected errors
235 |                 logging.error(f"[{method_name}] 读取全局摘要时发生未知错误: {str(e)}", exc_info=True) # 添加 exc_info=True
236 |         else:
237 |             logging.warning(f"[{method_name}] Summary file does not exist: {summary_file}")
238 | 
239 |         # 返回获取到的全局摘要（可能为空字符串）
240 |         logging.debug(f"[{method_name}] Returning global_summary (first 100 chars): '{global_summary[:100]}'")
241 |         return global_summary
242 |     
243 |     def _get_previous_summary(self, chapter_idx: int) -> str:
244 |         """获取上一章摘要"""
245 |         method_name = "_get_previous_summary" # For logging clarity
246 |         logging.debug(f"[{method_name}] Called for chapter_idx: {chapter_idx}")
247 |         previous_summary = ""
248 |         # 检查 chapter_idx 是否大于 0，确保有上一章
249 |         if chapter_idx > 0:
250 |             summary_file = os.path.join(self.output_dir, "summary.json")
251 |             logging.debug(f"[{method_name}] Summary file path: {summary_file}")
252 |             # 检查摘要文件是否存在
253 |             if os.path.exists(summary_file):
254 |                 logging.debug(f"[{method_name}] Summary file exists.")
255 |                 try:
256 |                     logging.debug(f"[{method_name}] Entering try block to read summary file.")
257 |                     # 打开并读取摘要文件
258 |                     with open(summary_file, 'r', encoding='utf-8') as f:
259 |                         # 首先加载摘要文件内容到 summaries 字典
260 |                         logging.debug(f"[{method_name}] Loading JSON from file...")
261 |                         summaries = json.load(f)
262 |                         logging.debug(f"[{method_name}] JSON loaded. Type: {type(summaries)}. Content (first 500 chars): {str(summaries)[:500]}")
263 | 
264 |                         # 确保 summaries 是字典
265 |                         if not isinstance(summaries, dict):
266 |                              logging.error(f"[{method_name}] Loaded summaries is not a dictionary! Type: {type(summaries)}")
267 |                              # 返回空字符串，避免后续错误
268 |                              return ""
269 | 
270 |                         # 正确获取上一章的 key (章节索引从 0 开始，章节号从 1 开始)
271 |                         prev_chapter_num_str = str(chapter_idx) # 上一章的章节号是 chapter_idx
272 |                         logging.debug(f"[{method_name}] Previous chapter key to lookup: '{prev_chapter_num_str}'")
273 | 
274 |                         # 使用 .get() 安全访问，如果 key 不存在则返回空字符串
275 |                         logging.debug(f"[{method_name}] Attempting to get summary for key '{prev_chapter_num_str}' using .get()")
276 |                         previous_summary = summaries.get(prev_chapter_num_str, "")
277 |                         logging.debug(f"[{method_name}] .get() returned. previous_summary is now (first 100 chars): '{previous_summary[:100]}'")
278 | 
279 |                         # 如果未找到摘要，记录警告
280 |                         if not previous_summary:
281 |                             logging.warning(f"[{method_name}] 未能找到第 {prev_chapter_num_str} 章的摘要。")
282 | 
283 |                 # 使用更具体的异常处理
284 |                 except json.JSONDecodeError as e:
285 |                     logging.error(f"[{method_name}] 解析摘要文件 {summary_file} 失败: {e}")
286 |                 except Exception as e:
287 |                     # Log the full traceback for unexpected errors
288 |                     logging.error(f"[{method_name}] 读取上一章摘要时发生未知错误: {str(e)}", exc_info=True) # 添加 exc_info=True
289 |             else:
290 |                 logging.warning(f"[{method_name}] Summary file does not exist: {summary_file}")
291 |         else:
292 |             logging.debug(f"[{method_name}] chapter_idx is 0, no previous summary to get.")
293 | 
294 |         # 返回获取到的摘要（可能为空字符串）
295 |         logging.debug(f"[{method_name}] Returning previous_summary (first 100 chars): '{previous_summary[:100]}'")
296 |         return previous_summary
297 |     
298 |     # def _get_previous_scene(self, chapter_idx: int) -> str:
299 |     #     ...
300 |     
301 |     # def _get_character_info(self, characters: Dict[str, Any], chapter_outline: Dict[str, Any]) -> str:
302 |     #     ... 


--------------------------------------------------------------------------------
/src/generators/content/validators.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 验证器模块 - 负责处理小说内容的各类验证
  3 | 
  4 | 此模块提供了以下验证功能：
  5 | 1. 逻辑严密性验证
  6 | 2. 重复文字验证
  7 | """
  8 | 
  9 | import logging
 10 | from typing import Dict, List, Tuple, Optional
 11 | import re
 12 | from .. import prompts
 13 | 
 14 | class LogicValidator:
 15 |     """逻辑严密性验证器"""
 16 |     
 17 |     def __init__(self, content_model):
 18 |         self.content_model = content_model
 19 |     
 20 |     def check_logic(
 21 |         self, 
 22 |         chapter_content: str, 
 23 |         chapter_outline: Dict, 
 24 |         sync_info: Optional[str] = None
 25 |     ) -> Tuple[str, bool]:
 26 |         """
 27 |         检查章节内容的逻辑严密性
 28 |         
 29 |         Args:
 30 |             chapter_content: 章节内容
 31 |             chapter_outline: 章节大纲
 32 |             sync_info: 同步信息（可选）
 33 |             
 34 |         Returns:
 35 |             tuple: (验证报告, 是否需要修改)
 36 |         """
 37 |         prompt = prompts.get_logic_check_prompt(
 38 |             chapter_content=chapter_content,
 39 |             chapter_outline=chapter_outline,
 40 |             sync_info=sync_info
 41 |         )
 42 |         
 43 |         try:
 44 |             check_result = self.content_model.generate(prompt)
 45 |             needs_revision = "需要修改" in check_result
 46 |             return check_result, needs_revision
 47 |         except Exception as e:
 48 |             logging.error(f"逻辑验证失败: {str(e)}")
 49 |             return "逻辑验证出错", True
 50 | 
 51 | class DuplicateValidator:
 52 |     """重复文字验证器"""
 53 |     
 54 |     def __init__(self, content_model):
 55 |         self.content_model = content_model
 56 |         self.min_duplicate_length = 50  # 最小重复文字长度
 57 |         self.max_duplicate_ratio = 0.3  # 最大允许重复比例
 58 |     
 59 |     def check_duplicates(
 60 |         self,
 61 |         chapter_content: str,
 62 |         prev_content: str = "",
 63 |         next_content: str = ""
 64 |     ) -> Tuple[str, bool]:
 65 |         """
 66 |         检查章节内容的重复文字
 67 |         
 68 |         Args:
 69 |             chapter_content: 当前章节内容
 70 |             prev_content: 上一章内容
 71 |             next_content: 下一章内容
 72 |             
 73 |         Returns:
 74 |             tuple: (验证报告, 是否需要修改)
 75 |         """
 76 |         # 1. 检查章节内部重复
 77 |         internal_duplicates = self._find_internal_duplicates(chapter_content)
 78 |         
 79 |         # 2. 检查与前后章节的重复
 80 |         cross_chapter_duplicates = self._find_cross_chapter_duplicates(
 81 |             chapter_content, prev_content, next_content
 82 |         )
 83 |         
 84 |         # 3. 生成验证报告
 85 |         report = self._generate_report(internal_duplicates, cross_chapter_duplicates)
 86 |         
 87 |         # 4. 判断是否需要修改
 88 |         needs_revision = (
 89 |             len(internal_duplicates) > 0 or
 90 |             len(cross_chapter_duplicates) > 0
 91 |         )
 92 |         
 93 |         return report, needs_revision
 94 |     
 95 |     def _find_internal_duplicates(self, content: str) -> List[Tuple[str, int, int]]:
 96 |         """查找章节内部的重复文字"""
 97 |         duplicates = []
 98 |         content_length = len(content)
 99 |         
100 |         # 使用滑动窗口查找重复片段
101 |         for length in range(self.min_duplicate_length, content_length // 2):
102 |             for start in range(content_length - length * 2):
103 |                 pattern = content[start:start + length]
104 |                 # 在当前片段之后查找相同内容
105 |                 next_start = start + length
106 |                 while True:
107 |                     next_start = content.find(pattern, next_start)
108 |                     if next_start == -1:
109 |                         break
110 |                     duplicates.append((pattern, start, next_start))
111 |                     next_start += 1
112 |         
113 |         return duplicates
114 |     
115 |     def _find_cross_chapter_duplicates(
116 |         self,
117 |         current_content: str,
118 |         prev_content: str,
119 |         next_content: str
120 |     ) -> List[Tuple[str, str, int, int]]:
121 |         """查找与前后章节的重复文字"""
122 |         duplicates = []
123 |         
124 |         # 检查与上一章的重复
125 |         if prev_content:
126 |             for length in range(self.min_duplicate_length, len(current_content) // 2):
127 |                 for start in range(len(current_content) - length):
128 |                     pattern = current_content[start:start + length]
129 |                     if pattern in prev_content:
130 |                         duplicates.append(("prev", pattern, start, prev_content.find(pattern)))
131 |         
132 |         # 检查与下一章的重复
133 |         if next_content:
134 |             for length in range(self.min_duplicate_length, len(current_content) // 2):
135 |                 for start in range(len(current_content) - length):
136 |                     pattern = current_content[start:start + length]
137 |                     if pattern in next_content:
138 |                         duplicates.append(("next", pattern, start, next_content.find(pattern)))
139 |         
140 |         return duplicates
141 |     
142 |     def _generate_report(
143 |         self,
144 |         internal_duplicates: List[Tuple[str, int, int]],
145 |         cross_chapter_duplicates: List[Tuple[str, str, int, int]]
146 |     ) -> str:
147 |         """生成验证报告"""
148 |         report = "重复文字验证报告\n\n"
149 |         
150 |         # 内部重复报告
151 |         if internal_duplicates:
152 |             report += "1. 章节内部重复：\n"
153 |             for pattern, start1, start2 in internal_duplicates:
154 |                 report += f"- 重复内容：{pattern}\n"
155 |                 report += f"  位置：{start1} 和 {start2}\n"
156 |         else:
157 |             report += "1. 章节内部重复：未发现\n"
158 |         
159 |         # 跨章节重复报告
160 |         if cross_chapter_duplicates:
161 |             report += "\n2. 跨章节重复：\n"
162 |             for chapter, pattern, start1, start2 in cross_chapter_duplicates:
163 |                 chapter_name = "上一章" if chapter == "prev" else "下一章"
164 |                 report += f"- 与{chapter_name}重复：{pattern}\n"
165 |                 report += f"  位置：当前章节 {start1}，{chapter_name} {start2}\n"
166 |         else:
167 |             report += "\n2. 跨章节重复：未发现\n"
168 |         
169 |         # 统计信息
170 |         total_duplicates = len(internal_duplicates) + len(cross_chapter_duplicates)
171 |         report += f"\n总计发现 {total_duplicates} 处重复\n"
172 |         
173 |         return report 


--------------------------------------------------------------------------------
/src/generators/finalizer/finalizer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import re
  4 | import string
  5 | import random
  6 | import json
  7 | from typing import Optional, Set, Dict, List
  8 | # from opencc import OpenCC # Keep if used elsewhere, otherwise remove
  9 | from ..common.data_structures import Character, ChapterOutline # Keep if Character is used later
 10 | from ..common.utils import load_json_file, save_json_file, clean_text, validate_directory
 11 | # --- Import the correct prompt function ---
 12 | from .. import prompts # Import the prompts module
 13 | 
 14 | # Get logger
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | class NovelFinalizer:
 18 |     def __init__(self, config, content_model, knowledge_base):
 19 |         self.config = config
 20 |         self.content_model = content_model
 21 |         self.knowledge_base = knowledge_base
 22 |         self.output_dir = config.output_config["output_dir"]
 23 |         
 24 |         # 验证并创建输出目录
 25 |         validate_directory(self.output_dir)
 26 | 
 27 |     def finalize_chapter(self, chapter_num: int, update_characters: bool = False, update_summary: bool = True) -> bool:
 28 |         """处理章节的定稿工作
 29 |         
 30 |         Args:
 31 |             chapter_num: 要处理的章节号
 32 |             update_characters: 是否更新角色状态
 33 |             update_summary: 是否更新章节摘要
 34 |             
 35 |         Returns:
 36 |             bool: 处理是否成功
 37 |         """
 38 |         logger.info(f"开始定稿第 {chapter_num} 章...")
 39 |         try:
 40 |             # Load outline to get the title for the filename
 41 |             outline_file = os.path.join(self.output_dir, "outline.json")
 42 |             logger.info(f"实际读取的大纲文件路径: {outline_file}")
 43 |             if not os.path.exists(outline_file):
 44 |                 logger.error(f"无法找到大纲文件: {outline_file}")
 45 |                 return False
 46 | 
 47 |             outline_data = load_json_file(outline_file, default_value={})
 48 |             # Handle both dict {chapters: []} and list [] formats
 49 |             chapters_list = []
 50 |             if isinstance(outline_data, dict) and "chapters" in outline_data and isinstance(outline_data["chapters"], list):
 51 |                  chapters_list = outline_data["chapters"]
 52 |             elif isinstance(outline_data, list):
 53 |                  chapters_list = outline_data
 54 |             else:
 55 |                  logger.error(f"无法识别的大纲文件格式: {outline_file}")
 56 |                  return False
 57 | 
 58 |             if not (1 <= chapter_num <= len(chapters_list)):
 59 |                 logger.error(f"章节号 {chapter_num} 超出大纲范围 (1-{len(chapters_list)})")
 60 |                 return False
 61 | 
 62 |             chapter_outline_data = chapters_list[chapter_num - 1]
 63 |             if not isinstance(chapter_outline_data, dict):
 64 |                  logger.error(f"第 {chapter_num} 章的大纲条目不是有效的字典格式。")
 65 |                  return False
 66 | 
 67 |             title = chapter_outline_data.get('title', f'无标题章节{chapter_num}') # Default title if missing
 68 |             cleaned_title = self._clean_filename(title) # Use helper method
 69 | 
 70 |             # Construct the chapter filename
 71 |             chapter_file = os.path.join(self.output_dir, f"第{chapter_num}章_{cleaned_title}.txt")
 72 |             logger.debug(f"尝试读取章节文件: {chapter_file}")
 73 | 
 74 |             if not os.path.exists(chapter_file):
 75 |                 logger.error(f"章节文件不存在: {chapter_file}")
 76 |                 return False
 77 | 
 78 |             with open(chapter_file, 'r', encoding='utf-8') as f:
 79 |                 content = f.read()
 80 |             logger.debug(f"成功读取章节 {chapter_num} 内容，长度: {len(content)}")
 81 |             
 82 |             # Generate/update summary
 83 |             if update_summary:
 84 |                 logger.info(f"开始更新第 {chapter_num} 章摘要...")
 85 |                 
 86 |                 # 先尝试重新生成指定章节的摘要文件（使用已有摘要）
 87 |                 if not self._regenerate_chapter_summary_file(chapter_num, content):
 88 |                     logger.warning(f"重新生成第 {chapter_num} 章摘要文件失败")
 89 |                     
 90 |                     # 如果摘要文件生成失败，再尝试更新summary.json
 91 |                     if not self._update_summary(chapter_num, content):
 92 |                         logger.error(f"更新第 {chapter_num} 章摘要失败")
 93 |                         return False
 94 |                 else:
 95 |                     # 摘要文件生成成功，确保summary.json也是最新的
 96 |                     self._update_summary(chapter_num, content)
 97 |                 
 98 |                 logger.info(f"第 {chapter_num} 章摘要更新成功。")
 99 |             
100 |             logging.info(f"第 {chapter_num} 章定稿完成")
101 |             
102 |             # 新增：自动仿写功能
103 |             if self._should_trigger_auto_imitation(chapter_num):
104 |                 logger.info(f"章节号 {chapter_num} 触发自动仿写...")
105 |                 # 使用 imitation_model
106 |                 imitation_model_config = self.config.get_imitation_model()
107 |                 if imitation_model_config["type"] == "gemini":
108 |                     imitation_model = self.content_model.__class__(imitation_model_config)
109 |                 elif imitation_model_config["type"] == "openai":
110 |                     from src.models.openai_model import OpenAIModel
111 |                     imitation_model = OpenAIModel(imitation_model_config)
112 |                 elif imitation_model_config["type"] == "volcengine":
113 |                     from src.models.openai_model import OpenAIModel
114 |                     imitation_model = OpenAIModel(imitation_model_config)  # 火山引擎复用OpenAI兼容实现
115 |                 else:
116 |                     logger.error(f"不支持的模型类型: {imitation_model_config['type']}")
117 |                     imitation_model = self.content_model
118 |                 if self._perform_auto_imitation(chapter_num, content, cleaned_title, imitation_model):
119 |                     logger.info(f"第 {chapter_num} 章自动仿写完成")
120 |                 else:
121 |                     logger.warning(f"第 {chapter_num} 章自动仿写失败，但不影响定稿流程")
122 |             
123 |             # 新增：定稿章节号为5的倍数时，自动更新sync_info.json（根据进度关系决定更新策略）
124 |             if chapter_num % 5 == 0:
125 |                 try:
126 |                     # 检查当前进度，避免用历史章节覆盖最新进度
127 |                     sync_info_file = os.path.join(self.output_dir, "sync_info.json")
128 |                     current_progress = self._get_current_progress(sync_info_file)
129 |                     
130 |                     if current_progress is None:
131 |                         # 如果没有现有进度，直接更新
132 |                         logger.info(f"章节号 {chapter_num} 为5的倍数，sync_info.json不存在或无进度记录，直接更新")
133 |                         self._update_sync_info_for_finalize(chapter_num)
134 |                     elif chapter_num < current_progress:
135 |                         # 定稿章节小于当前进度，不更新以保护最新进度
136 |                         logger.info(f"章节号 {chapter_num} 为5的倍数，但小于当前进度 {current_progress}，跳过sync_info.json更新以保护进度")
137 |                     elif chapter_num > current_progress:
138 |                         # 定稿章节大于当前进度，需要更新
139 |                         logger.info(f"章节号 {chapter_num} 为5的倍数，且大于当前进度 {current_progress}，更新sync_info.json")
140 |                         self._update_sync_info_for_finalize(chapter_num)
141 |                     else:
142 |                         # 定稿章节等于当前进度，备份后更新
143 |                         logger.info(f"章节号 {chapter_num} 为5的倍数，且等于当前进度 {current_progress}，备份原同步信息后更新")
144 |                         self._backup_sync_info(sync_info_file)
145 |                         self._update_sync_info_for_finalize(chapter_num)
146 |                         
147 |                 except Exception as sync_e:
148 |                     logger.error(f"章节号 {chapter_num} 为5的倍数，但自动更新sync_info.json失败: {sync_e}", exc_info=True)
149 |             
150 |             return True
151 |             
152 |         except Exception as e:
153 |             # Log the full traceback for unexpected errors
154 |             logger.error(f"处理章节 {chapter_num} 定稿时发生意外错误: {str(e)}", exc_info=True)
155 |             return False
156 | 
157 |     def _clean_filename(self, filename: str) -> str:
158 |         """清理字符串，使其适合作为文件名"""
159 |         # Remove common illegal characters
160 |         cleaned = re.sub(r'[\\/*?:"<>|]', "", str(filename)) # Ensure input is string
161 |         # Remove potentially problematic leading/trailing spaces or dots
162 |         cleaned = cleaned.strip(". ")
163 |         # Prevent overly long filenames (optional)
164 |         # max_len = 100
165 |         # if len(cleaned) > max_len:
166 |         #     name_part, ext = os.path.splitext(cleaned)
167 |         #     cleaned = name_part[:max_len-len(ext)-3] + "..." + ext
168 |         # Provide a default name if cleaned is empty
169 |         if not cleaned:
170 |             # Use chapter number if available, otherwise random int
171 |             # This method doesn't know the chapter number directly, so use random
172 |             return f"untitled_chapter_{random.randint(1000,9999)}"
173 |         return cleaned
174 | 
175 |     def _update_summary(self, chapter_num: int, content: str) -> bool:
176 |         """生成并更新章节摘要"""
177 |         try:
178 |             summary_file = os.path.join(self.output_dir, "summary.json")
179 |             # Load existing summaries safely
180 |             summaries = load_json_file(summary_file, default_value={})
181 |             if not isinstance(summaries, dict):
182 |                  logger.warning(f"摘要文件 {summary_file} 内容不是字典，将重新创建。")
183 |                  summaries = {}
184 | 
185 |             # Generate new summary
186 |             # Limit content length to avoid excessive prompt size/cost
187 |             max_content_for_summary = self.config.generation_config.get("summary_max_content_length", 4000)
188 |             # --- Call the imported prompt function ---
189 |             prompt = prompts.get_summary_prompt(content[:max_content_for_summary])
190 |             # --- End of change ---
191 |             logger.debug(f"为第 {chapter_num} 章生成摘要的提示词 (前100字符): {prompt[:100]}...")
192 |             new_summary = self.content_model.generate(prompt)
193 | 
194 |             if not new_summary or not new_summary.strip():
195 |                  logger.error(f"模型未能为第 {chapter_num} 章生成有效摘要。")
196 |                  return False # Treat empty summary as failure
197 | 
198 |             # Clean the summary text
199 |             cleaned_summary = self._clean_summary(new_summary)
200 |             logger.debug(f"第 {chapter_num} 章生成的原始摘要 (前100字符): {new_summary[:100]}...")
201 |             logger.debug(f"第 {chapter_num} 章清理后的摘要 (前100字符): {cleaned_summary[:100]}...")
202 | 
203 |             # Update the summaries dictionary
204 |             summaries[str(chapter_num)] = cleaned_summary # Use string key
205 | 
206 |             # Save updated summaries
207 |             if save_json_file(summary_file, summaries):
208 |                 # logger.info(f"已更新第 {chapter_num} 章摘要") # Moved success log to finalize_chapter
209 |                 return True
210 |             else:
211 |                  logger.error(f"保存摘要文件 {summary_file} 失败。")
212 |                  return False
213 | 
214 |         except Exception as e:
215 |             logger.error(f"更新第 {chapter_num} 章摘要时出错: {str(e)}", exc_info=True)
216 |             return False
217 | 
218 |     def _clean_summary(self, summary: str) -> str:
219 |         """清理摘要文本，移除常见的前缀、格式和多余空白"""
220 |         if not summary:
221 |             return ""
222 | 
223 |         cleaned_summary = summary.strip() # Initial trim
224 | 
225 |         # Patterns to remove at the beginning (case-insensitive)
226 |         patterns_to_remove = [
227 |             r"^\s*好的，根据你提供的内容，以下是章节摘要[:：\s]*",
228 |             r"^\s*好的，这是章节摘要[:：\s]*",
229 |             r"^\s*以下是章节摘要[:：\s]*",
230 |             r"^\s*章节摘要[:：\s]*",
231 |             r"^\s*摘要[:：\s]*",
232 |             r"^\s*\*\*摘要[:：\s]*\*\*", # Handle markdown bold
233 |             r"^\s*本章讲述了?[:：\s]*",
234 |             r"^\s*本章主要讲述了?[:：\s]*",
235 |             r"^\s*本章描述了?[:：\s]*",
236 |             r"^\s*本章主要描述了?[:：\s]*",
237 |             r"^\s*本章叙述了?[:：\s]*",
238 |             r"^\s*本章主要叙述了?[:：\s]*",
239 |             r"^\s*本章介绍了?[:：\s]*",
240 |             r"^\s*本章主要介绍了?[:：\s]*",
241 |             r"^\s*这一章?节?主要[:：\s]*",
242 |             r"^\s*本章内容摘要如下[:：\s]*",
243 |             # Add more patterns as needed
244 |         ]
245 | 
246 |         # Remove patterns iteratively
247 |         for pattern in patterns_to_remove:
248 |             # Use re.IGNORECASE for case-insensitivity
249 |             # Use re.DOTALL in case newlines are part of the pattern
250 |             cleaned_summary = re.sub(pattern, "", cleaned_summary, flags=re.IGNORECASE | re.DOTALL).strip()
251 | 
252 |         # Final trim to remove any leading/trailing whitespace possibly left by removal
253 |         cleaned_summary = cleaned_summary.strip()
254 | 
255 |         return cleaned_summary
256 | 
257 |     def _should_trigger_auto_imitation(self, chapter_num: int) -> bool:
258 |         """判断是否应该触发自动仿写"""
259 |         try:
260 |             # 检查仿写功能是否启用
261 |             imitation_config = getattr(self.config, 'imitation_config', {})
262 |             if not imitation_config.get('enabled', False):
263 |                 return False
264 |             
265 |             auto_config = imitation_config.get('auto_imitation', {})
266 |             if not auto_config.get('enabled', False):
267 |                 return False
268 |             
269 |             # 检查是否开启全局仿写
270 |             trigger_all_chapters = auto_config.get('trigger_all_chapters', False)
271 |             if trigger_all_chapters:
272 |                 return True
273 |             
274 |             # 兼容旧配置：检查章节号是否在触发列表中
275 |             trigger_chapters = auto_config.get('trigger_chapters', [])
276 |             if trigger_chapters:
277 |                 return chapter_num in trigger_chapters
278 |             
279 |             return False
280 |             
281 |         except Exception as e:
282 |             logger.error(f"检查自动仿写触发条件时出错: {e}")
283 |             return False
284 | 
285 |     def _perform_auto_imitation(self, chapter_num: int, content: str, cleaned_title: str, imitation_model=None) -> bool:
286 |         """执行自动仿写"""
287 |         try:
288 |             imitation_config = getattr(self.config, 'imitation_config', {})
289 |             auto_config = imitation_config.get('auto_imitation', {})
290 |             
291 |             # 获取默认风格
292 |             default_style_name = auto_config.get('default_style', '古风雅致')
293 |             style_sources = auto_config.get('style_sources', [])
294 |             
295 |             # 查找默认风格配置
296 |             default_style = None
297 |             for style in style_sources:
298 |                 if style.get('name') == default_style_name:
299 |                     default_style = style
300 |                     break
301 |             
302 |             if not default_style:
303 |                 logger.error(f"未找到默认风格配置: {default_style_name}")
304 |                 return False
305 |             
306 |             # 读取风格源文件
307 |             style_file_path = default_style.get('file_path')
308 |             if not os.path.exists(style_file_path):
309 |                 logger.error(f"风格源文件不存在: {style_file_path}")
310 |                 return False
311 |             
312 |             with open(style_file_path, 'r', encoding='utf-8') as f:
313 |                 style_text = f.read()
314 |             
315 |             # 构建临时知识库
316 |             temp_kb_config = {
317 |                 "chunk_size": 1200,
318 |                 "chunk_overlap": 300,
319 |                 "cache_dir": imitation_config.get('manual_imitation', {}).get('temp_kb_cache_dir', 'data/cache/imitation_cache')
320 |             }
321 |             
322 |             # 创建临时知识库
323 |             temp_kb = self.knowledge_base.__class__(temp_kb_config, self.knowledge_base.embedding_model)
324 |             temp_kb.build(style_text, force_rebuild=False)
325 |             
326 |             # 检索风格范例
327 |             style_examples = temp_kb.search(content, k=3)
328 |             
329 |             # 生成仿写提示词
330 |             extra_prompt = default_style.get('extra_prompt', '')
331 |             prompt = prompts.get_imitation_prompt(content, style_examples, extra_prompt)
332 |             
333 |             # 调用 imitation_model 生成仿写内容
334 |             model = imitation_model if imitation_model is not None else self.content_model
335 |             imitated_content = model.generate(prompt)
336 |             
337 |             if not imitated_content or not imitated_content.strip():
338 |                 logger.error(f"模型未能生成有效的仿写内容")
339 |                 return False
340 |             
341 |             # 保存仿写结果
342 |             output_suffix = auto_config.get('output_suffix', '_imitated')
343 |             imitated_file = os.path.join(self.output_dir, f"第{chapter_num}章_{cleaned_title}{output_suffix}.txt")
344 |             
345 |             # 如果需要备份原文件
346 |             if auto_config.get('backup_original', True):
347 |                 original_file = os.path.join(self.output_dir, f"第{chapter_num}章_{cleaned_title}.txt")
348 |                 backup_file = os.path.join(self.output_dir, f"第{chapter_num}章_{cleaned_title}_original.txt")
349 |                 if os.path.exists(original_file):
350 |                     import shutil
351 |                     shutil.copy2(original_file, backup_file)
352 |                     logger.info(f"已备份原文件到: {backup_file}")
353 |             
354 |             # 保存仿写结果
355 |             with open(imitated_file, 'w', encoding='utf-8') as f:
356 |                 f.write(imitated_content)
357 |             
358 |             logger.info(f"仿写结果已保存到: {imitated_file}")
359 |             return True
360 |             
361 |         except Exception as e:
362 |             logger.error(f"执行自动仿写时出错: {e}", exc_info=True)
363 |             return False
364 | 
365 |     def _regenerate_chapter_summary_file(self, chapter_num: int, content: str) -> bool:
366 |         """重新生成指定章节的摘要文件"""
367 |         try:
368 |             # 从summary.json中获取已生成的摘要，避免重复生成
369 |             summary_file = os.path.join(self.output_dir, "summary.json")
370 |             if os.path.exists(summary_file):
371 |                 summaries = load_json_file(summary_file, default_value={})
372 |                 chapter_key = str(chapter_num)
373 |                 
374 |                 if chapter_key in summaries:
375 |                     # 使用已生成的摘要
376 |                     summary_content = summaries[chapter_key]
377 |                     logger.info(f"使用已生成的第 {chapter_num} 章摘要")
378 |                 else:
379 |                     # 如果summary.json中没有，则重新生成
380 |                     max_content_for_summary = self.config.generation_config.get("summary_max_content_length", 4000)
381 |                     prompt = prompts.get_summary_prompt(content[:max_content_for_summary])
382 |                     new_summary = self.content_model.generate(prompt)
383 | 
384 |                     if not new_summary or not new_summary.strip():
385 |                         logger.error(f"模型未能为第 {chapter_num} 章生成有效摘要。")
386 |                         return False
387 | 
388 |                     summary_content = self._clean_summary(new_summary)
389 |             else:
390 |                 # 如果summary.json不存在，则生成新摘要
391 |                 max_content_for_summary = self.config.generation_config.get("summary_max_content_length", 4000)
392 |                 prompt = prompts.get_summary_prompt(content[:max_content_for_summary])
393 |                 new_summary = self.content_model.generate(prompt)
394 | 
395 |                 if not new_summary or not new_summary.strip():
396 |                     logger.error(f"模型未能为第 {chapter_num} 章生成有效摘要。")
397 |                     return False
398 | 
399 |                 summary_content = self._clean_summary(new_summary)
400 |             
401 |             # 保存到单独的摘要文件
402 |             summary_filename = f"第{chapter_num}章_摘要.txt"
403 |             summary_file_path = os.path.join(self.output_dir, summary_filename)
404 |             
405 |             with open(summary_file_path, 'w', encoding='utf-8') as f:
406 |                 f.write(summary_content)
407 |             
408 |             logger.info(f"已重新生成第 {chapter_num} 章摘要文件: {summary_file_path}")
409 |             return True
410 |             
411 |         except Exception as e:
412 |             logger.error(f"重新生成第 {chapter_num} 章摘要文件时出错: {str(e)}", exc_info=True)
413 |             return False
414 | 
415 |     def _get_current_progress(self, sync_info_file: str) -> Optional[int]:
416 |         """获取当前进度"""
417 |         try:
418 |             if not os.path.exists(sync_info_file):
419 |                 return None
420 | 
421 |             with open(sync_info_file, 'r', encoding='utf-8') as f:
422 |                 sync_info = json.load(f)
423 | 
424 |             current_chapter = sync_info.get("当前章节")
425 |             if current_chapter is not None:
426 |                 return int(current_chapter)
427 |             return None
428 |             
429 |         except Exception as e:
430 |             logger.warning(f"获取当前进度时出错: {e}")
431 |             return None
432 | 
433 |     def _backup_sync_info(self, sync_info_file: str) -> bool:
434 |         """备份同步信息文件"""
435 |         try:
436 |             if not os.path.exists(sync_info_file):
437 |                 logger.warning(f"同步信息文件不存在，无需备份: {sync_info_file}")
438 |                 return True
439 |             
440 |             import time
441 |             timestamp = time.strftime("%Y%m%d_%H%M%S")
442 |             backup_file = f"{sync_info_file}.backup_{timestamp}"
443 |             
444 |             import shutil
445 |             shutil.copy2(sync_info_file, backup_file)
446 |             logger.info(f"已备份同步信息文件到: {backup_file}")
447 |             return True
448 |             
449 |         except Exception as e:
450 |             logger.error(f"备份同步信息文件失败: {e}")
451 |             return False
452 | 
453 |     def _update_sync_info_for_finalize(self, chapter_num: int) -> bool:
454 |         """为finalize模式更新同步信息"""
455 |         try:
456 |             from ..content.content_generator import ContentGenerator
457 |             # 构造临时ContentGenerator实例，仅用于同步信息更新
458 |             temp_content_gen = ContentGenerator(self.config, self.content_model, self.knowledge_base)
459 |             temp_content_gen.current_chapter = chapter_num
460 |             temp_content_gen._load_outline()  # 主动加载大纲
461 |             temp_content_gen._trigger_sync_info_update(self.content_model)
462 |             logger.info(f"finalize模式已更新sync_info.json，当前章节: {chapter_num}")
463 |             return True
464 |             
465 |         except Exception as e:
466 |             logger.error(f"finalize模式更新sync_info.json失败: {e}", exc_info=True)
467 |             return False
468 | 
469 | if __name__ == "__main__":
470 |     import argparse
471 |     # 绝对导入，兼容直接运行
472 |     from src.config.config import Config
473 |     from src.models import ContentModel, KnowledgeBase
474 |     
475 |     parser = argparse.ArgumentParser(description='处理小说章节的定稿工作')
476 |     parser.add_argument('--config', type=str, required=True, help='配置文件路径')
477 |     parser.add_argument('--chapter', type=int, required=True, help='要处理的章节号')
478 |     
479 |     args = parser.parse_args()
480 |     
481 |     # 加载配置
482 |     config = Config(args.config)
483 |     
484 |     # 初始化模型和知识库
485 |     content_model = ContentModel(config)
486 |     knowledge_base = KnowledgeBase(config)
487 |     
488 |     # 创建定稿器
489 |     finalizer = NovelFinalizer(config, content_model, knowledge_base)
490 |     
491 |     # 处理定稿
492 |     success = finalizer.finalize_chapter(args.chapter)
493 |     
494 |     if success:
495 |         print("章节定稿处理成功！")
496 |     else:
497 |         print("章节定稿处理失败，请查看日志文件了解详细信息。") 


--------------------------------------------------------------------------------
/src/generators/prompts.py:
--------------------------------------------------------------------------------
   1 | from typing import Dict, List, Optional
   2 | import dataclasses # 导入 dataclasses 以便类型提示
   3 | import json
   4 | from src.config.config import Config  # 导入 Config 类
   5 | import os
   6 | import logging
   7 | from .humanization_prompts import (
   8 |     get_humanization_prompt, 
   9 |     get_dialogue_enhancement_prompt, 
  10 |     get_simplification_prompt,
  11 |     get_zhuque_optimized_prompt,
  12 |     generate_adaptive_humanization_prompt,
  13 |     get_rewrite_prompt_for_high_ai_content,
  14 |     get_chinese_punctuation_rules,
  15 |     get_enhanced_zhuque_prompt_with_punctuation
  16 | )
  17 | 
  18 | # 初始化 Config 实例
  19 | config = Config()
  20 | 
  21 | # 如果 ChapterOutline 只在此处用作类型提示，可以简化或使用 Dict
  22 | # from .novel_generator import ChapterOutline # 或者定义一个类似的结构
  23 | 
  24 | # 为了解耦，我们这里使用 Dict 作为 outline 的类型提示
  25 | # @dataclasses.dataclass
  26 | # class SimpleChapterOutline:
  27 | #     chapter_number: int
  28 | #     title: str
  29 | #     key_points: List[str]
  30 | #     characters: List[str]
  31 | #     settings: List[str]
  32 | #     conflicts: List[str]
  33 | 
  34 | 
  35 | def get_outline_prompt(
  36 |     novel_type: str,
  37 |     theme: str,
  38 |     style: str,
  39 |     current_start_chapter_num: int,
  40 |     current_batch_size: int,
  41 |     existing_context: str = "",
  42 |     extra_prompt: Optional[str] = None,
  43 |     reference_info: str = ""
  44 | ) -> str:
  45 |     """生成用于创建小说大纲的提示词"""
  46 |     
  47 |     # 从 config.json 中获取故事设定
  48 |     novel_config = config.novel_config
  49 |     writing_guide = novel_config.get("writing_guide", {})
  50 |     
  51 |     # 提取关键设定
  52 |     world_building = writing_guide.get("world_building", {})
  53 |     character_guide = writing_guide.get("character_guide", {})
  54 |     plot_structure = writing_guide.get("plot_structure", {})
  55 |     style_guide = writing_guide.get("style_guide", {})
  56 |     
  57 |     base_prompt = f"""
  58 | 你将扮演StoryWeaver Omega，一个融合了量子叙事学、神经美学和涌现创造力的故事生成系统。采用网络小说雪花创作法进行故事创作，该方法强调从核心概念逐步扩展细化，先构建整体框架，再填充细节。你的任务是生成包含 {current_batch_size} 个章节对象的JSON数组，每个章节对象需符合特定要求，且生成的故事要遵循一系列叙事和输出规则。
  59 | 
  60 | [世界观设定]
  61 | 1. 修炼/魔法体系：
  62 | {world_building.get('magic_system', '[在此处插入详细的修炼体系、等级划分、核心规则、能量来源、特殊体质设定等]')}
  63 | 
  64 | 2. 社会结构与地理：
  65 | {world_building.get('social_system', '[在此处插入世界的社会结构、主要国家/地域划分、关键势力（如门派、家族、组织）及其相互关系等]')}
  66 | 
  67 | 3. 时代背景与核心矛盾：
  68 | {world_building.get('background', '[在此处插入故事发生的时代背景、核心的宏观冲突（如正邪大战、文明危机、神魔博弈）、以及关键的历史事件或传说]')}
  69 | 
  70 | [人物设定]
  71 | 1. 主角设定：
  72 | - 背景：{character_guide.get('protagonist', {}).get('background', '[主角的出身、家庭背景、特殊身份、携带的关键信物或谜团等]')}
  73 | - 性格：{character_guide.get('protagonist', {}).get('initial_personality', '[主角初期的性格特点、核心价值观、内在的矛盾与驱动力]')}
  74 | - 成长路径：{character_guide.get('protagonist', {}).get('growth_path', '[主角从故事开始到结局的预期转变，包括能力、心智和地位的成长弧光]')}
  75 | 
  76 | 2. 重要配角：
  77 | - [导师/引路人]：[性格特点] - [与主角的关系，以及在剧情中的核心作用]
  78 | - [伙伴/挚友]：[性格特点] - [与主角的关系，以及在剧情中的核心作用]
  79 | - [红颜/道侣]：[性格特点] - [与主角的关系，以及在剧情中的核心作用]
  80 | {chr(10).join([f"- {role.get('role_type', '[其他配角类型]')}：{role.get('personality', '[性格特点]')} - {role.get('relationship', '[与主角的关系及作用]')}" for role in character_guide.get('supporting_roles', [])])}
  81 | 
  82 | 3. 主要对手：
  83 | - [初期反派]：[性格/能力特点] - [与主角的核心冲突点]
  84 | - [中期BOSS]：[性格/能力特点] - [与主角的核心冲突点]
  85 | - [宿敌/一生之敌]：[性格/能力特点] - [与主角的核心冲突点]
  86 | - [幕后黑手]：[性格/能力特点] - [与主角的核心冲突点]
  87 | {chr(10).join([f"- {role.get('role_type', '[其他对手类型]')}：{role.get('personality', '[性格特点]')} - {role.get('conflict_point', '[与主角的核心冲突点]')}" for role in character_guide.get('antagonists', [])])}
  88 | 
  89 | 
  90 | [剧情结构（三幕式）]
  91 | 1. 第一幕：建立
  92 | - 铺垫：{plot_structure.get('act_one', {}).get('setup', '[故事开端，介绍主角和其所处的世界，展示其日常状态和初步矛盾]')}
  93 | - 触发事件：{plot_structure.get('act_one', {}).get('inciting_incident', '[一个关键事件打破主角的平静生活，迫使其踏上征程或做出改变]')}
  94 | - 第一情节点：{plot_structure.get('act_one', {}).get('first_plot_point', '[主角做出第一个重大决定，正式进入新的世界或接受挑战，无法回头]')}
  95 | 
  96 | 2. 第二幕：对抗
  97 | - 上升行动：{plot_structure.get('act_two', {}).get('rising_action', '[主角学习新技能，结识新伙伴，遭遇一系列挑战和胜利，逐步接近目标]')}
  98 | - 中点：{plot_structure.get('act_two', {}).get('midpoint', '[剧情发生重大转折，主角可能获得关键信息或遭遇重大失败，故事的赌注被提高]')}
  99 | - 复杂化：{plot_structure.get('act_two', {}).get('complications', '[盟友可能是敌人，计划出现意外，主角面临更复杂的困境和道德抉择]')}
 100 | - 最黑暗时刻：{plot_structure.get('act_two', {}).get('darkest_moment', '[主角遭遇最惨重的失败，失去一切希望，仿佛已经无力回天]')}
 101 | - 第二情节点：{plot_structure.get('act_two', {}).get('second_plot_point', '[主角获得新的启示、力量或盟友，重新振作，制定最终决战的计划]')}
 102 | 
 103 | 3. 第三幕：解决
 104 | - 高潮：{plot_structure.get('act_three', {}).get('climax', '[主角与最终反派展开决战，所有次要情节汇集于此，是故事最紧张的时刻]')}
 105 | - 结局：{plot_structure.get('act_three', {}).get('resolution', '[决战结束，核心冲突得到解决，主角达成或未能达成其最终目标]')}
 106 | - 尾声：{plot_structure.get('act_three', {}).get('denouement', '[展示决战后的世界和人物状态，为续集或新的故事线埋下伏笔]')}
 107 | 
 108 | [写作风格]
 109 | 1. 基调：{style_guide.get('tone', '[故事的整体基调，如：热血、黑暗、幽默、悬疑、史诗等]')}
 110 | 2. 节奏：{style_guide.get('pacing', '[故事的节奏，如：快节奏、单元剧、慢热、张弛有度等]')}
 111 | 3. 描写重点：
 112 | - {style_guide.get('description_focus', ['[描写的第一个侧重点，如：战斗场面、世界观奇观、人物内心等]'])[0]}
 113 | - {style_guide.get('description_focus', ['[描写的第二个侧重点，如：势力间的权谋博弈、神秘氛围的营造等]'])[1]}
 114 | - {style_guide.get('description_focus', ['[描写的第三个侧重点，如：主角的成长与反思、配角群像的刻画等]'])[2]}
 115 | 
 116 | [上下文信息]
 117 | {existing_context}
 118 | 
 119 | [叙事要求]
 120 | 1. 情节连贯性：
 121 |    - 必须基于前文发展，保持故事逻辑的连贯性
 122 |    - 每个新章节都要承接前文伏笔，并为后续发展埋下伏笔
 123 |    - 确保人物行为符合其性格设定和发展轨迹
 124 | 
 125 | 2. 结构完整性：
 126 |    - 每章必须包含起承转合四个部分
 127 |    - 每3-5章形成一个完整的故事单元
 128 |    - 每10-20章形成一个大的故事弧
 129 | 
 130 | 3. 人物发展：
 131 |    - 确保主要人物的性格和动机保持一致性
 132 |    - 根据前文发展合理推进人物关系
 133 |    - 适时引入新角色，但需与现有角色产生关联
 134 | 
 135 | 4. 世界观一致性：
 136 |    - 严格遵守已建立的世界规则
 137 |    - 新设定必须与现有设定兼容
 138 |    - 保持场景和环境的连贯性
 139 | 
 140 | 5. 避免重复与独创性：
 141 |    - **绝不能重复现有章节（特别是 `[上下文信息]` 中提供的内容）的标题、关键情节、核心冲突或主要事件。**
 142 |    - **每一章都必须有独特的、推进剧情的新内容，即使主题相似，也要有新的角度和发展。**
 143 |    - 充分利用 `[上下文信息]` 来理解故事的当前状态，并在此基础上进行创新和扩展，而非简单的变体或重复。
 144 | 
 145 | [输出要求]
 146 | 1. 直接输出JSON数组，包含 {current_batch_size} 个章节对象
 147 | 2. 每个章节对象必须包含：
 148 |    - chapter_number: 章节号
 149 |    - title: 章节标题
 150 |    - key_points: 关键剧情点列表（至少3个）
 151 |    - characters: 涉及角色列表（至少2个）
 152 |    - settings: 场景列表（至少1个）
 153 |    - conflicts: 核心冲突列表（至少1个）
 154 | 
 155 | [质量检查]
 156 | 1. 是否严格遵循世界观设定？
 157 | 2. 人物行为是否符合其设定和发展轨迹？
 158 | 3. 情节是否符合整体剧情结构？
 159 | 4. 是否保持写作风格的一致性？
 160 | 5. 是否包含足够的伏笔和悬念？
 161 | """
 162 | 
 163 |     if extra_prompt:
 164 |         base_prompt += f"{chr(10)}[额外要求]{chr(10)}{extra_prompt}"
 165 | 
 166 |     if reference_info:
 167 |         base_prompt += f"{chr(10)}[知识库参考信息]{chr(10)}{reference_info}{chr(10)}"
 168 | 
 169 |     return base_prompt
 170 | 
 171 | 
 172 | def get_chapter_prompt(
 173 |     outline: Dict, 
 174 |     references: Dict,
 175 |     extra_prompt: str = "",
 176 |     context_info: str = "",
 177 |     story_config: Optional[Dict] = None,
 178 |     sync_info: Optional[Dict] = None
 179 | ) -> str:
 180 |     """生成用于创建章节内容的提示词"""
 181 |     
 182 |     # 获取基本信息
 183 |     novel_number = outline.get('chapter_number', 0)
 184 |     chapter_title = outline.get('title', '未知')
 185 |     
 186 |     # 格式化关键情节点
 187 |     key_points_list = outline.get('key_points', [])
 188 |     key_points_display = chr(10).join([f"- {point}" for point in key_points_list])
 189 |     
 190 |     # 其他信息
 191 |     characters = ', '.join(outline.get('characters', []))
 192 |     settings = ', '.join(outline.get('settings', []))
 193 |     conflicts = ', '.join(outline.get('conflicts', []))
 194 | 
 195 |     # 新增：安全join函数，兼容dict和str
 196 |     def safe_join_list(items, default=""):
 197 |         if not items:
 198 |             return default
 199 |         result = []
 200 |         for item in items:
 201 |             if isinstance(item, dict):
 202 |                 name = item.get("名称") or item.get("name") or item.get("title") or ""
 203 |                 desc = item.get("简介") or item.get("说明") or item.get("desc") or ""
 204 |                 if name and desc:
 205 |                     result.append(f"{name}:{desc}")
 206 |                 elif name:
 207 |                     result.append(name)
 208 |                 elif desc:
 209 |                     result.append(desc)
 210 |                 else:
 211 |                     result.append(str(item))
 212 |             elif isinstance(item, str):
 213 |                 result.append(item)
 214 |             else:
 215 |                 result.append(str(item))
 216 |         return ', '.join(result) if result else default
 217 | 
 218 |     base_prompt = f"""你是一名专业网文作者，熟知起点中文网、番茄小说网、晋江文学城的网文创作技巧，你的文笔节奏、表达富于变化，语句总是超出预测，同时扣人心弦。你特别擅长创作节奏紧凑、对话生动、且极具人性化特色的网络小说。"""
 219 | 
 220 |     # 添加故事设定信息（如果提供）
 221 |     if story_config:
 222 |         writing_guide = story_config.get("writing_guide", {})
 223 |         world_building = writing_guide.get("world_building", {})
 224 |         character_guide = writing_guide.get("character_guide", {})
 225 |         style_guide = writing_guide.get("style_guide", {})
 226 |         
 227 |         base_prompt += f"""
 228 | 
 229 | [故事设定]
 230 | 世界观：
 231 | 1. 修炼/魔法体系：
 232 | {world_building.get('magic_system', '[在此处插入详细的修炼体系、等级划分、核心规则、能量来源、特殊体质设定等]')}
 233 | 
 234 | 2. 社会结构与地理：
 235 | {world_building.get('social_system', '[在此处插入世界的社会结构、主要国家/地域划分、关键势力（如门派、家族、组织）及其相互关系等]')}
 236 | 
 237 | 3. 时代背景与核心矛盾：
 238 | {world_building.get('background', '[在此处插入故事发生的时代背景、核心的宏观冲突（如正邪大战、文明危机、神魔博弈）、以及关键的历史事件或传说]')}
 239 | 
 240 | 人物设定：
 241 | 1. 主角背景：
 242 | {character_guide.get('protagonist', {}).get('background', '[在此处插入主角的背景故事、家族渊源、成长经历等]')}
 243 | 
 244 | 2. 主角性格：
 245 | {character_guide.get('protagonist', {}).get('initial_personality', '[在此处插入主角的性格特点、行为习惯、口头禅等]')}
 246 | 
 247 | 3. 主角成长路径：
 248 | {character_guide.get('protagonist', {}).get('growth_path', '[在此处插入主角的成长路径、修炼方向、特殊能力等]')}
 249 | 
 250 | 写作风格：
 251 | 1. 基调：{style_guide.get('tone', '[故事的整体基调，如：热血、黑暗、幽默、悬疑、史诗等]')}
 252 | 2. 节奏：{style_guide.get('pacing', '[故事的节奏，如：快节奏、单元剧、慢热、张弛有度等]')}
 253 | 3. 描写重点：
 254 | - {style_guide.get('description_focus', ['[描写的第一个侧重点，如：战斗场面、世界观奇观、人物内心等]'])[0]}
 255 | - {style_guide.get('description_focus', ['[描写的第二个侧重点，如：势力间的权谋博弈、神秘氛围的营造等]'])[1]}
 256 | - {style_guide.get('description_focus', ['[描写的第三个侧重点，如：主角的成长与反思、配角群像的刻画等]'])[2]}"""
 257 | 
 258 |     # 添加同步信息（如果提供）
 259 |     if sync_info:
 260 |         world_info = sync_info.get("世界观", {})
 261 |         character_info = sync_info.get("人物设定", {})
 262 |         plot_info = sync_info.get("剧情发展", {})
 263 |         
 264 |         base_prompt += f"""
 265 | 
 266 | [故事进展信息]
 267 | 世界观现状：
 268 | - 世界背景：{safe_join_list(world_info.get('世界背景', []))}
 269 | - 阵营势力：{safe_join_list(world_info.get('阵营势力', []))}
 270 | - 重要规则：{safe_join_list(world_info.get('重要规则', []))}
 271 | - 关键场所：{safe_join_list(world_info.get('关键场所', []))}
 272 | 
 273 | 人物现状：
 274 | {chr(10).join([f"- {char.get('名称', '未知')}：{char.get('身份', '')} - {char.get('当前状态', '')}" for char in character_info.get('人物信息', [])])}
 275 | 
 276 | 剧情发展：
 277 | - 主线梗概：{plot_info.get('主线梗概', '未设定')}
 278 | - 重要事件：{safe_join_list(plot_info.get('重要事件', [])[-5:])}  # 最近5个重要事件
 279 | - 进行中冲突：{safe_join_list(plot_info.get('进行中冲突', []))}
 280 | - 悬念伏笔：{safe_join_list(plot_info.get('悬念伏笔', [])[-3:])}  # 最近3个伏笔"""
 281 | 
 282 |     base_prompt += f"""
 283 | 
 284 | [章节信息]
 285 | 章节号: {novel_number}
 286 | 标题: {chapter_title}
 287 | 关键情节点:
 288 | {key_points_display}
 289 | 
 290 | [核心元素]
 291 | 人物: {characters}
 292 | 场景: {settings}
 293 | 冲突: {conflicts}
 294 | 
 295 | [输出要求]
 296 | 1. 仅返回章节正文文本，以"第{novel_number}章 {chapter_title}"开头，然后换行开始正文。
 297 | 2. 严格使用简体中文及中文标点符号，特别是中文双引号“”。
 298 | 3. 确保段落划分合理，长短句结合，保持特定风格韵味和阅读节奏感。
 299 | 4. 避免使用与故事背景不符的词汇或网络梗，保持世界观的沉浸感。
 300 | 5. 重点突出人物对话的生动性和风格特色。
 301 | 
 302 | [网文创作降AI浓度核心要求]
 303 | 1. **场景呈现方式（摒弃形容修饰）**：
 304 |    - 通过人物的视觉、听觉、触觉、嗅觉、味觉感知呈现真实场景
 305 |    - 展现人物内心思考和欲望，符合行为逻辑
 306 |    - 避免无意义的环境描写，只聚焦不寻常细节
 307 | 
 308 | 2. **对话驱动故事**：
 309 |    - 以对话为主要推进手段，欲望藏在潜台词里
 310 |    - 制造信息差、误解、质疑、伪装、口是心非的交缠
 311 |    - 每个人物都有自己的利益诉求和偏见
 312 | 
 313 | 3. **冲突无处不在**：
 314 |    - 明里的对抗，暗地的较量，充满暗示意味
 315 |    - 利益纠葛、情感拉扯比打斗更精彩
 316 |    - 重视事件前后的态度反转和看点
 317 | 
 318 | 4. **人物行为逻辑**：
 319 |    - 人物要时刻观察、思考，结合经验判断并行动
 320 |    - 允许判断错误，体现人性的不完美
 321 |    - 人心中的成见如大山，先入为主带有偏见
 322 | 
 323 | 5. **表达简洁自然**：
 324 |    - 采用网文自由、通俗化、略带口语化的表达
 325 |    - 减少修饰，避免精确量化，模糊掉数量描述
 326 |    - 描写视觉化，注重动态、对比、反差
 327 | 
 328 | 6. **配角故事线**：
 329 |    - 并非所有场景都有主角在场
 330 |    - 围绕配角展开的故事最终回归主角生活
 331 |    - 场景间衔接流畅，通过行动、对话、描写过渡
 332 | 
 333 | [质量检查]
 334 | 1. 语言是否具有参考风格文章的韵味，用词是否恰当？
 335 | 2. 对话是否自然流畅，符合人物身份和性格？
 336 | 3. 节奏控制是否得当，张弛有度？
 337 | 4. 环境描写是否精炼而富有画面感？
 338 | 5. 人物刻画是否立体，情感表达是否真实？"""
 339 | 
 340 |     # 添加人性化写作指导
 341 |     base_prompt += f"{chr(10)}{get_humanization_prompt()}"
 342 |     base_prompt += f"{chr(10)}{get_dialogue_enhancement_prompt()}"
 343 |     base_prompt += f"{chr(10)}{get_simplification_prompt()}"
 344 |     
 345 |     # 添加集成了中文标点符号规范的朱雀优化提示词
 346 |     base_prompt += f"{chr(10)}{get_enhanced_zhuque_prompt_with_punctuation()}"
 347 |     
 348 |     # 添加中文标点符号专项规范
 349 |     base_prompt += f"{chr(10)}{get_chinese_punctuation_rules()}"
 350 |     
 351 |     # 添加朱雀AI检测专项优化
 352 |     base_prompt += f"""{chr(10)}
 353 | [🚨 朱雀AI检测最终要求 - 必须200%执行]
 354 | 1. **超级强制犹豫表达**：每15-20字必须包含1个犹豫词汇
 355 | 2. **超级对话密度要求**：对话比例必须超过65%，连续叙述不得超过30字
 356 | 3. **极限句式破坏要求**：绝对禁止任何2句使用相同句式结构
 357 | 4. **超级语言瑕疵要求**：每句必须包含5种以上不完整句子和口语化错误
 358 | 5. **超级生活细节强制**：每段必须包含5-8个无关紧要的生活化细节
 359 | 6. **超级情感混乱要求**：人物情感要有极度矛盾和犹豫，绝对不能过于完美
 360 | 7. **中文标点符号强制规范**：
 361 |    - 省略号必须用「……」或「…………」，绝对禁止「...」
 362 |    - 破折号必须用「——」，绝对禁止「--」
 363 |    - 引号必须用「“”」和「‘’」，绝对禁止英文引号
 364 |    - 所有标点必须是中文全角字符，绝对禁止英文标点！
 365 | 
 366 | ⚠️ 紧急警告：当前文本被朱雀AI检测识别为100%AI，如果不严格按超级要求执行，将继续被识别为100%AI！
 367 | """
 368 |     
 369 |     # 添加超级自适应优化
 370 |     adaptive_humanization = generate_adaptive_humanization_prompt(
 371 |         ai_score=80.0,  # 假设高风险（因为被识别为100%AI）
 372 |         dialogue_ratio=0.2,  # 假设当前对话比例低
 373 |         content_type="chapter"
 374 |     )
 375 |     base_prompt += f"{chr(10)}{adaptive_humanization}"
 376 | 
 377 |     # 添加额外要求
 378 |     if extra_prompt:
 379 |         base_prompt += f"{chr(10)}[额外要求]{chr(10)}{extra_prompt}"
 380 | 
 381 |     # 添加上下文信息（限制长度）
 382 |     if context_info:
 383 |         # 限制上下文信息长度，避免过长
 384 |         max_context_length = 1500  # 减少上下文长度，避免过度依赖
 385 |         if len(context_info) > max_context_length:
 386 |             context_info = context_info[-max_context_length:] + "...(前文已省略)"
 387 |         base_prompt += f"{chr(10)}[上下文信息]{chr(10)}{context_info}"
 388 | 
 389 |     return base_prompt
 390 | 
 391 | 
 392 | def get_summary_prompt(
 393 |     chapter_content: str
 394 | ) -> str:
 395 |     """生成用于创建章节摘要的提示词。"""
 396 |     prompt = f"""请为以下章节内容生成一个简洁的摘要。
 397 | 
 398 | 章节内容：
 399 | {chapter_content[:4000]}... (内容过长已截断)
 400 | 
 401 | [输出要求]
 402 | 1.  **严格要求：只返回摘要正文本身。**
 403 | 2.  不要包含任何前缀，例如 "本章摘要："、"章节摘要：" 、"内容摘要：" 或类似文字。
 404 | 3.  在返回的内容不必包含章节号或章节标题。
 405 | 4.  摘要应直接描述主要情节发展、关键人物行动和对剧情的影响。
 406 | 5.  字数控制在 200 字以内。
 407 | 6.  语言简洁，避免不必要的修饰。
 408 | 
 409 | 请直接输出摘要文本。"""
 410 |     return prompt
 411 | 
 412 | # =============== 6. 前文摘要更新提示词 ===================
 413 | def get_sync_info_prompt(
 414 |     story_content: str,
 415 |     existing_sync_info: str = "",
 416 |     current_chapter: int = 0
 417 | ) -> str:
 418 |     """生成用于创建/更新同步信息的提示词
 419 |     
 420 |     Args:
 421 |         story_content: 新增的故事内容
 422 |         existing_sync_info: 现有的同步信息（JSON字符串）
 423 |         current_chapter: 当前更新的章节号
 424 |     """
 425 |     return f"""根据故事进展更新相关信息，具体要求：
 426 | 1. 合理细化使得相关信息逻辑完整，但不扩展不存在的设定
 427 | 2. 精简表达，去除一切不必要的修饰，确保信息有效的同时使用最少tokens
 428 | 3. 只保留对后续故事发展有参考价值的内容
 429 | 4. 必须仅返回标准的JSON格式，不要添加任何前后缀、说明或标记
 430 | 
 431 | 现有同步信息：
 432 | {existing_sync_info}
 433 | 
 434 | 故事内容：
 435 | {story_content}
 436 | 
 437 | 你必须严格按以下JSON格式输出，不要添加任何文字说明或其他标记：
 438 | {{
 439 |     "世界观": {{
 440 |         "世界背景": [],
 441 |         "阵营势力": [],
 442 |         "重要规则": [],
 443 |         "关键场所": []
 444 |     }},
 445 |     "人物设定": {{
 446 |         "人物信息": [
 447 |             {{
 448 |                 "名称": "",
 449 |                 "身份": "",
 450 |                 "特点": "",
 451 |                 "发展历程": "",
 452 |                 "当前状态": ""
 453 |             }}
 454 |         ],
 455 |         "人物关系": []
 456 |     }},
 457 |     "剧情发展": {{
 458 |         "主线梗概": "",
 459 |         "重要事件": [],
 460 |         "悬念伏笔": [],
 461 |         "已解决冲突": [],
 462 |         "进行中冲突": []
 463 |     }},
 464 |     "前情提要": [],
 465 |     "当前章节": {current_chapter},
 466 |     "最后更新时间": ""
 467 | }}"""
 468 | 
 469 | # =============== 7. 核心种子设定提示词 ===================
 470 | def get_core_seed_prompt(
 471 |     topic: str,
 472 |     genre: str,
 473 |     number_of_chapters: int,
 474 |     word_number: int
 475 | ) -> str:
 476 |     """生成用于创建核心种子设定的提示词。"""
 477 |     return f"""
 478 | 作为专业作家，请用"雪花写作法"第一步构建故事核心：
 479 | 主题：{topic}
 480 | 类型：{genre}
 481 | 篇幅：约{number_of_chapters}章（每章{word_number}字）
 482 | 
 483 | 请用单句公式概括故事本质，例如：
 484 | "当[主角]遭遇[核心事件]，必须[关键行动]，否则[灾难后果]；与此同时，[隐藏的更大危机]正在发酵。"
 485 | 
 486 | 要求：
 487 | 1. 必须包含显性冲突与潜在危机
 488 | 2. 体现人物核心驱动力
 489 | 3. 暗示世界观关键矛盾
 490 | 4. 使用25-100字精准表达
 491 | 
 492 | 仅返回故事核心文本，不要解释任何内容。
 493 | """
 494 | 
 495 | # =============== 8. 当前章节摘要生成提示词 ===================
 496 | def get_recent_chapters_summary_prompt(
 497 |     combined_text: str,
 498 |     novel_number: int,
 499 |     chapter_title: str,
 500 |     chapter_role: str,
 501 |     chapter_purpose: str,
 502 |     suspense_level: str,
 503 |     foreshadowing: str,
 504 |     plot_twist_level: str,
 505 |     chapter_summary: str,
 506 |     next_chapter_number: int,
 507 |     next_chapter_title: str,
 508 |     next_chapter_role: str,
 509 |     next_chapter_purpose: str,
 510 |     next_chapter_suspense_level: str,
 511 |     next_chapter_foreshadowing: str,
 512 |     next_chapter_plot_twist_level: str,
 513 |     next_chapter_summary: str
 514 | ) -> str:
 515 |     """生成用于创建当前章节摘要的提示词。"""
 516 |     return f"""
 517 | 作为一名专业的小说编辑和知识管理专家，正在基于已完成的前三章内容和本章信息生成当前章节的精准摘要。请严格遵循以下工作流程：
 518 | 前三章内容：
 519 | {combined_text}
 520 | 
 521 | 当前章节信息：
 522 | 第{novel_number}章《{chapter_title}》：
 523 | ├── 本章定位：{chapter_role}
 524 | ├── 核心作用：{chapter_purpose}
 525 | ├── 悬念密度：{suspense_level}
 526 | ├── 伏笔操作：{foreshadowing}
 527 | ├── 认知颠覆：{plot_twist_level}
 528 | └── 本章简述：{chapter_summary}
 529 | 
 530 | 下一章信息：
 531 | 第{next_chapter_number}章《{next_chapter_title}》：
 532 | ├── 本章定位：{next_chapter_role}
 533 | ├── 核心作用：{next_chapter_purpose}
 534 | ├── 悬念密度：{next_chapter_suspense_level}
 535 | ├── 伏笔操作：{next_chapter_foreshadowing}
 536 | ├── 认知颠覆：{next_chapter_plot_twist_level}
 537 | └── 本章简述：{next_chapter_summary}
 538 | 
 539 | [上下文分析阶段]：
 540 | 1. 回顾前三章核心内容：
 541 |    - 第一章核心要素：[章节标题]→[核心冲突/理论]→[关键人物/概念]
 542 |    - 第二章发展路径：[已建立的人物关系]→[技术/情节进展]→[遗留伏笔]
 543 |    - 第三章转折点：[新出现的变量]→[世界观扩展]→[待解决问题]
 544 | 2. 提取延续性要素：
 545 |    - 必继承要素：列出前3章中必须延续的3个核心设定
 546 |    - 可调整要素：识别2个允许适度变化的辅助设定
 547 | 
 548 | [当前章节摘要生成规则]：
 549 | 1. 内容架构：
 550 |    - 继承权重：70%内容需与前3章形成逻辑递进
 551 |    - 创新空间：30%内容可引入新要素，但需标注创新类型（如：技术突破/人物黑化）
 552 | 2. 结构控制：
 553 |    - 采用"承继→发展→铺垫"三段式结构
 554 |    - 每段含1个前文呼应点+1个新进展
 555 | 3. 预警机制：
 556 |    - 若检测到与前3章设定冲突，用[!]标记并说明
 557 |    - 对开放式发展路径，提供2种合理演化方向
 558 | 
 559 | 现在请你基于目前故事的进展，完成以下两件事：
 560 | 用最多800字，写一个简洁明了的「当前章节摘要」；
 561 | 
 562 | 请按如下格式输出（不需要额外解释）：
 563 | 当前章节摘要: <这里写当前章节摘要>
 564 | """
 565 | 
 566 | # =============== 9. 章节一致性检查提示词 ===================
 567 | def get_consistency_check_prompt(
 568 |     chapter_content: str,
 569 |     chapter_outline: Dict,
 570 |     sync_info: Dict,
 571 |     previous_summary: str = "",
 572 |     character_info: str = "",
 573 |     previous_scene: str = ""
 574 | ) -> str:
 575 |     """生成用于检查章节一致性的提示词"""
 576 |     # 从同步信息中提取相关内容
 577 |     world_info = sync_info.get("世界观", {})
 578 |     character_info_dict = sync_info.get("人物设定", {})
 579 |     plot_info = sync_info.get("剧情发展", {})
 580 |     
 581 |     # 安全处理列表字段，确保能处理字典和字符串混合的情况
 582 |     def safe_join_list(items, default=""):
 583 |         """安全地连接列表，处理字典和字符串混合的情况"""
 584 |         if not items:
 585 |             return default
 586 |         result = []
 587 |         for item in items:
 588 |             if isinstance(item, dict):
 589 |                 # 如果是字典，提取名称和简介
 590 |                 name = item.get("名称", "")
 591 |                 desc = item.get("简介", item.get("说明", ""))
 592 |                 if name and desc:
 593 |                     result.append(f"{name}: {desc}")
 594 |                 elif name:
 595 |                     result.append(name)
 596 |                 elif desc:
 597 |                     result.append(desc)
 598 |             elif isinstance(item, str):
 599 |                 result.append(item)
 600 |             else:
 601 |                 result.append(str(item))
 602 |         return ", ".join(result) if result else default
 603 |     
 604 |     return f"""请检查章节内容的一致性：
 605 | 
 606 | [同步信息]
 607 | 世界观：{safe_join_list(world_info.get('世界背景', []))} | {safe_join_list(world_info.get('阵营势力', []))} | {safe_join_list(world_info.get('重要规则', []))}
 608 | 人物：{chr(10).join([f"- {char.get('role_type', '未知')}: {char.get('personality', '')}" for char in character_info_dict.get('人物信息', [])])}
 609 | 剧情：{plot_info.get('主线梗概', '')} | 冲突：{safe_join_list(plot_info.get('进行中冲突', []))} | 伏笔：{safe_join_list(plot_info.get('悬念伏笔', []))}
 610 | 
 611 | [章节大纲]
 612 | {chapter_outline.get('chapter_number', '未知')}章《{chapter_outline.get('title', '未知')}》
 613 | 关键点：{', '.join(chapter_outline.get('key_points', []))}
 614 | 角色：{', '.join(chapter_outline.get('characters', []))}
 615 | 场景：{', '.join(chapter_outline.get('settings', []))}
 616 | 冲突：{', '.join(chapter_outline.get('conflicts', []))}
 617 | 
 618 | [上一章摘要]
 619 | {previous_summary if previous_summary else "（无）"}
 620 | 
 621 | [章节内容]
 622 | {chapter_content}
 623 | 
 624 | ===== 一致性检查 =====
 625 | 请从以下维度评估（总分100分）：
 626 | 1. 世界观一致性（25分）：是否符合已建立的世界设定和规则
 627 | 2. 人物一致性（25分）：人物行为是否符合其设定和当前状态
 628 | 3. 剧情连贯性（25分）：与主线梗概的契合度，对已有伏笔的处理
 629 | 4. 逻辑合理性（25分）：事件发展是否合理，因果关系是否清晰
 630 | 
 631 | ===== 输出格式 =====
 632 | [总体评分]: <0-100分>
 633 | 
 634 | [世界观一致性]: <0-25分>
 635 | [人物一致性]: <0-25分>
 636 | [剧情连贯性]: <0-25分>
 637 | [逻辑合理性]: <0-25分>
 638 | 
 639 | [问题清单]:
 640 | 1. <具体问题>
 641 | 2. <具体问题>
 642 | ...
 643 | 
 644 | [修改建议]:
 645 | 1. <具体建议>
 646 | 2. <具体建议>
 647 | ...
 648 | 
 649 | [修改必要性]: <"需要修改"或"无需修改">
 650 | """
 651 | 
 652 | # =============== 10. 章节修正提示词 ===================
 653 | def get_chapter_revision_prompt(
 654 |     original_content: str,
 655 |     consistency_report: str,
 656 |     chapter_outline: Dict,
 657 |     previous_summary: str = "",
 658 |     global_summary: str = ""
 659 | ) -> str:
 660 |     """生成用于修正章节内容的提示词"""
 661 |     return f"""
 662 | 作为专业小说修改专家，请基于一致性检查报告，对小说章节进行必要的修改：
 663 | 
 664 | [一致性检查报告]
 665 | {consistency_report}
 666 | 
 667 | [原章节内容]
 668 | {original_content}
 669 | 
 670 | [章节大纲要求]
 671 | 章节号：{chapter_outline.get('chapter_number', '未知')}
 672 | 标题：{chapter_outline.get('title', '未知')}
 673 | 关键剧情点：{', '.join(chapter_outline.get('key_points', []))}
 674 | 涉及角色：{', '.join(chapter_outline.get('characters', []))}
 675 | 场景设定：{', '.join(chapter_outline.get('settings', []))}
 676 | 核心冲突：{', '.join(chapter_outline.get('conflicts', []))}
 677 | 
 678 | [上下文信息]
 679 | 前文摘要：{global_summary if global_summary else "（无前文摘要）"}
 680 | 上一章摘要：{previous_summary if previous_summary else "（无上一章摘要）"}
 681 | 
 682 | ===== 修改要求 =====
 683 | 1. 专注于修复一致性检查报告中指出的问题
 684 | 2. 保持原文风格和叙事方式
 685 | 3. 确保与前文的连贯性
 686 | 4. 保持修改后的文本长度与原文相近
 687 | 5. 确保修改符合章节大纲的要求
 688 | 
 689 | 请直接提供修改后的完整章节内容，不要解释修改内容或加入额外的文本。
 690 | """
 691 | 
 692 | # =============== 11. 知识库检索提示词 ===================
 693 | def get_knowledge_search_prompt(
 694 |     chapter_number: int,
 695 |     chapter_title: str,
 696 |     characters_involved: List[str],
 697 |     key_items: List[str],
 698 |     scene_location: str,
 699 |     chapter_role: str,
 700 |     chapter_purpose: str,
 701 |     foreshadowing: str,
 702 |     short_summary: str,
 703 |     user_guidance: str = "",
 704 |     time_constraint: str = ""
 705 | ) -> str:
 706 |     """生成用于知识库检索的提示词，过滤低相关性内容"""
 707 |     # 生成关键词组合逻辑
 708 |     keywords = []
 709 |     
 710 |     # 1. 优先使用用户指导中的术语
 711 |     if user_guidance:
 712 |         keywords.extend(user_guidance.split())
 713 |     
 714 |     # 2. 添加章节核心要素
 715 |     keywords.extend([f"章节{chapter_number}", chapter_title])
 716 |     keywords.extend(characters_involved)
 717 |     keywords.extend(key_items)
 718 |     keywords.extend([scene_location])
 719 |     
 720 |     # 3. 补充扩展概念（如伏笔、章节作用等）
 721 |     keywords.extend([chapter_role, chapter_purpose, foreshadowing])
 722 |     
 723 |     # 去重并过滤抽象词汇
 724 |     keywords = list(set([k for k in keywords if k and len(k) > 1]))
 725 |     
 726 |     # 生成检索词组合
 727 |     search_terms = []
 728 |     for i in range(0, len(keywords), 2):
 729 |         group = keywords[i:i+2]
 730 |         if group:
 731 |             search_terms.append(".".join(group))
 732 |     
 733 |     return "\n".join(search_terms[:5])  # 返回最多5组检索词
 734 | 
 735 | 
 736 | # =============== 12. 知识库内容过滤提示词 ===================
 737 | def get_knowledge_filter_prompt(
 738 |     retrieved_texts: List[str],
 739 |     chapter_info: Dict
 740 | ) -> str:
 741 |     """生成用于过滤知识库内容的提示词，增强过滤逻辑"""
 742 |     return f"""
 743 | 请根据当前章节需求过滤知识库内容，严格按以下规则执行：
 744 | 
 745 | [当前章节需求]
 746 | {json.dumps(chapter_info, ensure_ascii=False, indent=2)}
 747 | 
 748 | [待过滤内容]
 749 | {chr(10).join(["--- 片段 " + str(i+1) + " ---" + chr(10) + text[:200] + "..." for i, text in enumerate(retrieved_texts)])}
 750 | 
 751 | ===== 过滤规则 =====
 752 | 1. **冲突检测**：
 753 |    - 删除与已有世界观/角色设定矛盾的内容（标记为 ▲CONFLICT）。
 754 |    - 删除重复度＞40%的内容（标记为 ▲DUPLICATE）。
 755 | 
 756 | 2. **价值评估**：
 757 |    - 标记高价值内容（❗）：
 758 |      - 提供新角色关系或剧情转折可能性的内容。
 759 |      - 包含可扩展的细节（如场景描写、技术设定）。
 760 |    - 标记低价值内容（·）：
 761 |      - 泛泛而谈的描述或无具体情节的内容。
 762 | 
 763 | 3. **分类输出**：
 764 |    - 按以下分类整理内容，并标注适用场景：
 765 |      - 情节燃料：推动主线或支线发展的内容。
 766 |      - 人物维度：深化角色形象或关系的内容。
 767 |      - 世界碎片：补充世界观细节的内容。
 768 | 
 769 | [输出格式]
 770 | [分类名称]→[适用场景]
 771 | ❗/· [内容片段]（▲冲突提示）
 772 | ...
 773 | 
 774 | 示例：
 775 | [情节燃料]→可用于第{chapter_info.get('chapter_number', 'N')}章高潮
 776 | ❗ "主角发现密室中的古老地图，暗示下个副本位置"
 777 | · "村民谈论最近的异常天气"（可作背景铺垫）
 778 | """
 779 | 
 780 | def get_logic_check_prompt(
 781 |     chapter_content: str,
 782 |     chapter_outline: Dict,
 783 |     sync_info: Optional[str] = None
 784 | ) -> str:
 785 |     """生成用于检查章节逻辑严密性的提示词"""
 786 |     prompt = f"""请检查章节内容的逻辑严密性：
 787 | 
 788 | [章节大纲]
 789 | {chapter_outline.get('chapter_number', '未知')}章《{chapter_outline.get('title', '未知')}》
 790 | 关键点：{', '.join(chapter_outline.get('key_points', []))}
 791 | 角色：{', '.join(chapter_outline.get('characters', []))}
 792 | 场景：{', '.join(chapter_outline.get('settings', []))}
 793 | 冲突：{', '.join(chapter_outline.get('conflicts', []))} """
 794 | 
 795 |     # 添加同步信息部分（如果提供）
 796 |     if sync_info:
 797 |         prompt += f"""
 798 | 
 799 | [同步信息]
 800 | {sync_info}"""
 801 | 
 802 |     prompt += f"""
 803 | 
 804 | [章节内容]
 805 | {chapter_content}
 806 | 
 807 | ===== 逻辑检查 =====
 808 | 请从以下维度评估（总分100分）：
 809 | 1. 因果关系（25分）：事件发生是否有合理的因果关联，人物行为是否有合理的动机
 810 | 2. 时间线（25分）：事件发生顺序是否合理，是否存在时间线矛盾
 811 | 3. 空间逻辑（25分）：场景转换是否合理，人物位置关系是否合理
 812 | 4. 世界观（25分）：是否符合已建立的世界规则，是否存在世界观矛盾
 813 | 
 814 | ===== 输出格式 =====
 815 | [总体评分]: <0-100分>
 816 | 
 817 | [因果关系]: <0-25分>
 818 | [时间线]: <0-25分>
 819 | [空间逻辑]: <0-25分>
 820 | [世界观]: <0-25分>
 821 | 
 822 | [逻辑问题列表]:
 823 | 1. <问题描述>
 824 | 2. <问题描述>
 825 | ...
 826 | 
 827 | [修改建议]:
 828 | <针对每个逻辑问题的具体修改建议>
 829 | 
 830 | [修改必要性]: <"需要修改"或"无需修改">
 831 | """
 832 |     return prompt
 833 | 
 834 | def get_style_check_prompt(
 835 |     chapter_content: str,
 836 |     novel_config: Dict
 837 | ) -> str:
 838 |     """生成用于检查章节写作风格的提示词"""
 839 |     writing_guide = novel_config.get("writing_guide", {})
 840 |     style_guide = writing_guide.get("style_guide", {})
 841 |     
 842 |     # 获取风格指南
 843 |     tone = style_guide.get("tone", "")
 844 |     pov = style_guide.get("pov", "")
 845 |     narrative_style = style_guide.get("narrative_style", "")
 846 |     language_style = style_guide.get("language_style", "")
 847 |     
 848 |     return f"""请检查章节内容的写作风格：
 849 | 
 850 | [风格指南]
 851 | 语气：{tone} | 视角：{pov} | 叙事：{narrative_style} | 语言：{language_style}
 852 | 
 853 | [章节内容]
 854 | {chapter_content}
 855 | 
 856 | ===== 风格检查 =====
 857 | 请从以下维度评估（总分100分）：
 858 | 1. 语气一致性（25分）：是否保持指定的语气基调，情感表达是否恰当
 859 | 2. 视角把控（25分）：是否严格遵守视角限制，视角切换是否自然
 860 | 3. 叙事手法（25分）：是否符合指定的叙事风格，叙事节奏是否合适
 861 | 4. 语言特色（25分）：是否符合指定的语言风格，用词是否准确规范
 862 | 
 863 | ===== 输出格式 =====
 864 | [总体评分]: <0-100分>
 865 | 
 866 | [语气一致性]: <0-25分>
 867 | [视角把控]: <0-25分>
 868 | [叙事手法]: <0-25分>
 869 | [语言特色]: <0-25分>
 870 | 
 871 | [风格问题列表]:
 872 | 1. <问题描述>
 873 | 2. <问题描述>
 874 | ...
 875 | 
 876 | [修改建议]:
 877 | <针对每个风格问题的具体修改建议>
 878 | 
 879 | [修改必要性]: <"需要修改"或"无需修改">
 880 | """
 881 | 
 882 | def get_emotion_check_prompt(
 883 |     chapter_content: str,
 884 |     chapter_outline: Dict
 885 | ) -> str:
 886 |     """生成用于检查章节情感表达的提示词"""
 887 |     return f"""请检查章节内容的情感表达：
 888 | 
 889 | [章节大纲]
 890 | {chapter_outline.get('chapter_number', '未知')}章《{chapter_outline.get('title', '未知')}》
 891 | 情感基调：{chapter_outline.get('emotion', '未知')}
 892 | 关键点：{', '.join(chapter_outline.get('key_points', []))}
 893 | 角色：{', '.join(chapter_outline.get('characters', []))}
 894 | 
 895 | [章节内容]
 896 | {chapter_content}
 897 | 
 898 | ===== 情感检查 =====
 899 | 请从以下维度评估（总分100分）：
 900 | 1. 情感基调（25分）：是否符合章节预设基调，情感变化是否自然
 901 | 2. 人物情感（25分）：情感表达是否符合人物性格，情感反应是否合理
 902 | 3. 情感互动（25分）：人物间情感交流是否自然，情感冲突是否鲜明
 903 | 4. 读者共鸣（25分）：是否容易引起情感共鸣，是否有感情真实性
 904 | 
 905 | ===== 输出格式 =====
 906 | [总体评分]: <0-100分>
 907 | 
 908 | [情感基调]: <0-25分>
 909 | [人物情感]: <0-25分>
 910 | [情感互动]: <0-25分>
 911 | [读者共鸣]: <0-25分>
 912 | 
 913 | [情感问题列表]:
 914 | 1. <问题描述>
 915 | 2. <问题描述>
 916 | ...
 917 | 
 918 | [修改建议]:
 919 | <针对每个情感问题的具体修改建议>
 920 | 
 921 | [修改必要性]: <"需要修改"或"无需修改">
 922 | """
 923 | 
 924 | def get_imitation_prompt(
 925 |     original_text: str,
 926 |     style_examples: List[str],
 927 |     extra_prompt: Optional[str] = None
 928 | ) -> str:
 929 |     """
 930 |     生成用于仿写任务的提示词
 931 |     
 932 |     Args:
 933 |         original_text: 需要被重写的原始文本
 934 |         style_examples: 从风格范文中提取的、用于模仿的文本片段
 935 |         extra_prompt: 用户额外的指令
 936 |     """
 937 |     
 938 |     # 将风格范例格式化
 939 |     separator = "\n\n---\n\n"
 940 |     formatted_examples = separator.join(style_examples)
 941 |     
 942 |     prompt = f"""你是一位顶级的文体学家和模仿大师。你的任务是严格按照提供的「风格范例」，重写「原始文本」。
 943 | 
 944 | 核心要求：
 945 | 1. **保留核心意义**：必须完整、准确地保留「原始文本」的所有关键信息、情节和逻辑。不能增加或删减核心意思。
 946 | 2. **迁移文笔风格**：必须彻底地模仿「风格范例」的笔触。这包括：
 947 |    - **词汇选择**：使用与范例相似的词汇偏好（例如，是用"华丽辞藻"还是"朴实白描"）
 948 |    - **句式结构**：模仿范例的长短句搭配、倒装、排比等句式特点
 949 |    - **叙事节奏**：模仿范例是"快节奏推进"还是"慢节奏铺陈"
 950 |    - **情感基调**：模仿范例的整体情绪色彩（如冷静、激昂、悲伤等）
 951 |    - **标点符号用法**：注意范例中特殊标点（如破折号、省略号）的使用习惯
 952 | 
 953 | ---
 954 | 
 955 | [风格范例]
 956 | {formatted_examples}
 957 | 
 958 | ---
 959 | 
 960 | [原始文本]
 961 | {original_text}
 962 | 
 963 | ---
 964 | 
 965 | [额外要求]
 966 | {extra_prompt if extra_prompt else "无"}
 967 | 
 968 | ------
 969 | 
 970 | 现在，请开始仿写。直接输出仿写后的正文，不要包含任何解释或标题。"""
 971 |     
 972 |     return prompt
 973 | 
 974 | def get_enhanced_chapter_prompt(
 975 |     outline: Dict,
 976 |     references: Dict,
 977 |     extra_prompt: str = "",
 978 |     context_info: str = "",
 979 |     story_config: Optional[Dict] = None,
 980 |     sync_info: Optional[Dict] = None,
 981 |     ai_score_target: float = 20.0,
 982 |     current_dialogue_ratio: float = 0.0
 983 | ) -> str:
 984 |     """生成增强版章节内容提示词，针对性降低AI浓度"""
 985 |     
 986 |     # 获取基本信息
 987 |     novel_number = outline.get('chapter_number', 0)
 988 |     chapter_title = outline.get('title', '未知')
 989 |     
 990 |     # 格式化关键情节点
 991 |     key_points_list = outline.get('key_points', [])
 992 |     key_points_display = chr(10).join([f"- {point}" for point in key_points_list])
 993 |     
 994 |     # 其他信息
 995 |     characters = ', '.join(outline.get('characters', []))
 996 |     settings = ', '.join(outline.get('settings', []))
 997 |     conflicts = ', '.join(outline.get('conflicts', []))
 998 | 
 999 |     base_prompt = f"""你是一名专业网文作者，熟知起点中文网、番茄小说网、晋江文学城的网文创作技巧，你的文笔节奏、表达富于变化，语句总是超出预测，同时扣人心弦。你特别擅长创作节奏紧凑、对话生动、且极具人性化特色的网络小说。
1000 | 
1001 | ⚠️ 特别重要：本次创作必须通过专业AI检测工具的验证，目标AI检测结果低于{ai_score_target:.0f}%。
1002 | 
1003 | [AI检测优化要求]
1004 | 1. **对话主导原则**（最重要）：
1005 |    - 对话比例必须达到45%以上（当前仅{current_dialogue_ratio:.1%}）
1006 |    - 用对话推进剧情，大幅减少叙述
1007 |    - 每个人物说话要有明显的个人特色
1008 |    - 加入"呃"、"那个"、"怎么说呢"等停顿词
1009 | 
1010 | 2. **词汇彻底去 AI化**：
1011 |    - 禁止使用：伴随着、与此同时、紧接着、毫无疑问、显而易见
1012 |    - 优先使用：然后、接着、突然、结果、没想到、谁知道
1013 |    - 多用口语化表达：这玩意儿、什么鬼、搞什么
1014 | 
1015 | [章节信息]
1016 | 章节号: {novel_number}
1017 | 标题: {chapter_title}
1018 | 关键情节点:
1019 | {key_points_display}
1020 | 
1021 | [核心元素]
1022 | 人物: {characters}
1023 | 场景: {settings}
1024 | 冲突: {conflicts}
1025 | 
1026 | [输出要求]
1027 | 1. 仅返回章节正文文本，以“第{novel_number}章 {chapter_title}”开头。
1028 | 2. 严格使用简体中文及中文标点符号“”。
1029 | 3. 确保对话比例达到45%以上。
1030 | 4. 必须大量使用口语化表达和自然停顿。"""
1031 | 
1032 |     # 生成自适应人性化提示词
1033 |     adaptive_prompt = generate_adaptive_humanization_prompt(
1034 |         ai_score=100 - ai_score_target,
1035 |         dialogue_ratio=current_dialogue_ratio,
1036 |         content_type="chapter"
1037 |     )
1038 |     base_prompt += f"{chr(10)}{adaptive_prompt}"
1039 | 
1040 |     # 添加额外要求
1041 |     if extra_prompt:
1042 |         base_prompt += f"{chr(10)}[额外要求]{chr(10)}{extra_prompt}"
1043 | 
1044 |     # 添加上下文信息
1045 |     if context_info:
1046 |         max_context_length = 1000
1047 |         if len(context_info) > max_context_length:
1048 |             context_info = context_info[-max_context_length:] + "...(前文已省略)"
1049 |         base_prompt += f"{chr(10)}[上下文信息]{chr(10)}{context_info}"
1050 | 
1051 |     return base_prompt
1052 | 


--------------------------------------------------------------------------------
/src/generators/title_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | import datetime
  5 | from typing import Dict, List, Optional
  6 | 
  7 | class TitleGenerator:
  8 |     """小说标题、梗概和封面提示词生成器"""
  9 |     
 10 |     def __init__(self, model, output_dir: str = "data/marketing"):
 11 |         """
 12 |         初始化生成器
 13 |         
 14 |         Args:
 15 |             model: AI模型实例（支持generate方法）
 16 |             output_dir: 输出目录
 17 |         """
 18 |         self.model = model
 19 |         self.output_dir = output_dir
 20 |         os.makedirs(output_dir, exist_ok=True)
 21 |         
 22 |     def generate_titles(self, novel_type: str, theme: str, keywords: List[str], 
 23 |                          character_names: List[str], existing_outline: Optional[str] = None) -> Dict[str, str]:
 24 |         """
 25 |         生成符合各平台风格的小说标题
 26 |         
 27 |         Args:
 28 |             novel_type: 小说类型
 29 |             theme: 小说主题
 30 |             keywords: 关键词列表
 31 |             character_names: 主要角色名列表
 32 |             existing_outline: 已有的小说大纲或摘要
 33 |             
 34 |         Returns:
 35 |             Dict[str, str]: 平台名称到标题的映射
 36 |         """
 37 |         platforms = ["番茄小说", "七猫小说", "起点中文网", "书旗小说", "掌阅"]
 38 |         platform_styles = {
 39 |             "番茄小说": "吸引眼球、有悬念、通常包含数字",
 40 |             "七猫小说": "有仙侠/玄幻色彩、略带文艺气息",
 41 |             "起点中文网": "有气势、展现成长、争霸、逆袭等主题",
 42 |             "书旗小说": "简洁有力、能体现小说主要矛盾",
 43 |             "掌阅": "注重人物关系、情感冲突、容易记忆"
 44 |         }
 45 |         
 46 |         prompt = f"""
 47 |         请帮我为一部小说生成5个不同风格的标题，每个标题对应一个不同的阅读平台风格。
 48 |         
 49 |         【小说信息】
 50 |         类型：{novel_type}
 51 |         主题：{theme}
 52 |         关键词：{', '.join(keywords)}
 53 |         主要角色：{', '.join(character_names)}
 54 |         {'大纲摘要：' + existing_outline if existing_outline else ''}
 55 |         
 56 |         【要求】
 57 |         1. 每个标题必须在15个汉字以内
 58 |         2. 标题要有吸引力，符合该平台的风格特点
 59 |         3. 标题要体现小说的核心卖点和情感
 60 |         4. 避免过于俗套或老套的表达
 61 |         5. 标题要与小说类型、主题相符
 62 |         
 63 |         【不同平台的风格特点】
 64 |         {chr(10).join([f"- {platform}: {style}" for platform, style in platform_styles.items()])}
 65 |         
 66 |         请按照以下格式输出结果（仅输出标题，不要解释）：
 67 |         番茄小说：【标题】
 68 |         七猫小说：【标题】
 69 |         起点中文网：【标题】
 70 |         书旗小说：【标题】
 71 |         掌阅：【标题】
 72 |         """
 73 |         
 74 |         try:
 75 |             response = self.model.generate(prompt)
 76 |             titles = {}
 77 |             
 78 |             for line in response.strip().split('\n'):
 79 |                 if ':' in line or '：' in line:
 80 |                     parts = line.replace('：', ':').split(':', 1)
 81 |                     platform = parts[0].strip()
 82 |                     title = parts[1].strip()
 83 |                     
 84 |                     # 清理可能的多余字符
 85 |                     for char in ['【', '】', '"', '"']:
 86 |                         title = title.replace(char, '')
 87 |                     
 88 |                     titles[platform] = title
 89 |             
 90 |             return titles
 91 |         except Exception as e:
 92 |             logging.error(f"生成标题时出错: {str(e)}")
 93 |             return {platform: f"未能生成{platform}标题" for platform in platforms}
 94 |             
 95 |     def generate_summary(self, novel_type: str, theme: str, titles: Dict[str, str], 
 96 |                           summaries: List[str] = None) -> str:
 97 |         """
 98 |         生成200字以内的故事梗概
 99 |         
100 |         Args:
101 |             novel_type: 小说类型
102 |             theme: 小说主题
103 |             titles: 生成的标题
104 |             summaries: 已有的章节摘要列表
105 |             
106 |         Returns:
107 |             str: 生成的故事梗概
108 |         """
109 |         # 预处理摘要部分，避免在f-string表达式中使用反斜杠
110 |         summary_section = ""
111 |         if summaries:
112 |             summary_section = "【已有章节摘要】\n" + "\n".join(summaries)
113 |         
114 |         prompt = f"""
115 |         请为一部小说创作一段200字以内的故事梗概，这段梗概将用于小说的宣传推广。
116 |         
117 |         【小说信息】
118 |         类型：{novel_type}
119 |         主题：{theme}
120 |         可选标题：{', '.join(titles.values())}
121 |         
122 |         {summary_section}
123 |         
124 |         【要求】
125 |         1. 梗概必须控制在200字以内
126 |         2. 需要突出小说的核心冲突和主要卖点
127 |         3. 语言要生动有吸引力，能吸引读者点击阅读
128 |         4. 适当埋下悬念，引发读者的好奇心
129 |         5. 不要剧透小说的关键转折和结局
130 |         6. 要符合{novel_type}类小说的读者口味
131 |         7. 必须全部使用中文，不能包含任何英文单词或短语
132 |         8. 如果需要使用外来词，请使用对应的中文翻译
133 |         
134 |         请直接输出梗概文字，不要添加其他说明或标题。
135 |         """
136 |         
137 |         try:
138 |             summary = self.model.generate(prompt)
139 |             # 确保不超过200字
140 |             if len(summary) > 200:
141 |                 prompt_trim = f"""
142 |                 请将以下梗概缩减到200字以内，保持核心内容和吸引力：
143 |                 
144 |                 {summary}
145 |                 
146 |                 请确保使用纯中文，不包含任何英文单词。
147 |                 """
148 |                 summary = self.model.generate(prompt_trim)
149 |                 
150 |             return summary.strip()
151 |         except Exception as e:
152 |             logging.error(f"生成梗概时出错: {str(e)}")
153 |             return "未能生成小说梗概"
154 |             
155 |     def generate_cover_prompts(self, novel_type: str, titles: Dict[str, str], 
156 |                                  summary: str) -> Dict[str, str]:
157 |         """
158 |         生成封面提示词
159 |         
160 |         Args:
161 |             novel_type: 小说类型
162 |             titles: 生成的标题
163 |             summary: 故事梗概
164 |             
165 |         Returns:
166 |             Dict[str, str]: 标题到封面提示词的映射
167 |         """
168 |         # 使用所有标题生成封面提示词
169 |         title_list = list(titles.values())
170 |         platforms = list(titles.keys())
171 |         
172 |         # 第一步：生成每个平台的具体风格描述
173 |         style_prompt = f"""
174 |         请为以下小说标题生成每个平台的具体风格描述，用于后续生成封面提示词。
175 |         
176 |         【小说信息】
177 |         类型：{novel_type}
178 |         梗概：{summary}
179 |         标题：
180 |         {chr(10).join([f"{i+1}. {title}" for i, title in enumerate(title_list)])}
181 |         
182 |         【平台风格要求】
183 |         1. 番茄小说：现代感强、色彩鲜艳、视觉冲击力强
184 |         2. 七猫小说：仙侠风格、飘逸唯美、意境深远
185 |         3. 起点中文网：气势磅礴、热血沸腾、画面震撼
186 |         4. 书旗小说：简洁大气、重点突出、富有张力
187 |         5. 掌阅：细腻唯美、情感丰富、画面精致
188 |         
189 |         请为每个平台生成一个独特的风格描述，包含：
190 |         1. 人物特点（外貌、气质、表情等）
191 |         2. 场景特点（环境、氛围、光线等）
192 |         3. 色彩风格（主色调、色彩搭配等）
193 |         4. 构图特点（画面布局、重点等）
194 |         5. 特殊效果（光效、粒子、氛围等）
195 |         
196 |         请按照以下格式输出（每行一个平台，使用冒号分隔）：
197 |         平台名称：风格描述1、风格描述2、风格描述3、风格描述4、风格描述5
198 |         """
199 |         
200 |         try:
201 |             # 获取风格描述
202 |             style_response = self.model.generate(style_prompt)
203 |             logging.info(f"生成的风格描述：\n{style_response}")
204 |             
205 |             # 第二步：根据风格描述生成最终提示词
206 |             prompt = f"""
207 |             请根据以下风格描述，为每个平台生成具体的封面提示词。
208 |             
209 |             【小说信息】
210 |             类型：{novel_type}
211 |             梗概：{summary}
212 |             标题：
213 |             {chr(10).join([f"{i+1}. {title}" for i, title in enumerate(title_list)])}
214 |             
215 |             【风格描述】
216 |             {style_response}
217 |             
218 |             【要求】
219 |             1. 根据每个平台的风格描述生成具体的提示词
220 |             2. 提示词必须全部使用中文，不包含任何英文单词
221 |             3. 每个提示词至少包含6个要素，用顿号分隔
222 |             4. 提示词要能反映出小说的类型和氛围
223 |             5. 关键细节要与标题内涵相匹配
224 |             6. 每组提示词需要简洁明了
225 |             7. 不同平台的提示词必须完全不同
226 |             
227 |             请按照以下格式输出（每行一个平台，使用冒号分隔）：
228 |             平台名称：提示词1、提示词2、提示词3、提示词4、提示词5、提示词6
229 |             """
230 |             
231 |             response = self.model.generate(prompt)
232 |             logging.info(f"生成的原始响应：\n{response}")
233 |             cover_prompts = {}
234 |             
235 |             # 解析响应并匹配标题与平台
236 |             lines = response.strip().split('\n')
237 |             for line in lines:
238 |                 line = line.strip()
239 |                 if not line:
240 |                     continue
241 |                 
242 |                 # 尝试不同的分隔符
243 |                 if ':' in line:
244 |                     platform, prompt_text = line.split(':', 1)
245 |                 elif '：' in line:
246 |                     platform, prompt_text = line.split('：', 1)
247 |                 else:
248 |                     continue
249 |                 
250 |                 platform = platform.strip()
251 |                 prompt_text = prompt_text.strip()
252 |                 
253 |                 # 清理可能的多余字符
254 |                 for char in ['【', '】', '"', '"', '*']:
255 |                     prompt_text = prompt_text.replace(char, '')
256 |                 
257 |                 # 验证提示词是否有效
258 |                 if prompt_text and len(prompt_text.split('、')) >= 6 and platform in platforms:
259 |                     cover_prompts[platform] = prompt_text
260 |                     logging.info(f"成功解析平台 {platform} 的提示词：{prompt_text}")
261 |             
262 |             # 检查是否所有平台都有有效的提示词
263 |             missing_platforms = [p for p in platforms if p not in cover_prompts]
264 |             if missing_platforms:
265 |                 logging.warning(f"以下平台缺少有效的提示词：{missing_platforms}")
266 |             
267 |             if not missing_platforms:
268 |                 return cover_prompts
269 |             
270 |             # 如果有缺失的平台，生成默认提示词
271 |             for platform in missing_platforms:
272 |                 title = titles[platform]
273 |                 if platform == "番茄小说":
274 |                     cover_prompts[platform] = f"俊朗青年、现代修仙服、眼神坚毅、都市高楼背景、霓虹光效、2:3竖版构图"
275 |                 elif platform == "七猫小说":
276 |                     cover_prompts[platform] = f"仙气飘飘的男子、古风长袍、云雾缭绕、仙山背景、水墨意境、2:3竖版构图"
277 |                 elif platform == "起点中文网":
278 |                     cover_prompts[platform] = f"英气逼人的少年、战甲、金光万丈、战场背景、热血沸腾、2:3竖版构图"
279 |                 elif platform == "书旗小说":
280 |                     cover_prompts[platform] = f"气质沉稳的男子、道袍、水墨风格、道观背景、简洁大气、2:3竖版构图"
281 |                 else:  # 掌阅
282 |                     cover_prompts[platform] = f"温润如玉的男子、儒雅长衫、月光如水、庭院背景、细腻唯美、2:3竖版构图"
283 |             
284 |             return cover_prompts
285 |             
286 |         except Exception as e:
287 |             logging.error(f"生成封面提示词时出错: {str(e)}")
288 |             # 如果出错，使用默认提示词
289 |             return {platform: f"年轻男子、修仙服饰、{title}、2:3竖版构图、幻彩光效" 
290 |                    for platform, title in titles.items()}
291 |             
292 |     def save_to_file(self, titles: Dict[str, str], summary: str, 
293 |                      cover_prompts: Dict[str, str]) -> str:
294 |         """
295 |         保存生成的内容到文件
296 |         
297 |         Args:
298 |             titles: 生成的标题
299 |             summary: 故事梗概
300 |             cover_prompts: 封面提示词
301 |             
302 |         Returns:
303 |             str: 保存的文件路径
304 |         """
305 |         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
306 |         filename = os.path.join(self.output_dir, f"novel_marketing_{timestamp}.json")
307 |         
308 |         data = {
309 |             "timestamp": timestamp,
310 |             "titles": titles,
311 |             "summary": summary,
312 |             "cover_prompts": cover_prompts
313 |         }
314 |         
315 |         with open(filename, 'w', encoding='utf-8') as f:
316 |             json.dump(data, f, ensure_ascii=False, indent=2)
317 |             
318 |         # 同时保存一个Markdown版本，方便阅读
319 |         md_filename = os.path.join(self.output_dir, f"novel_marketing_{timestamp}.md")
320 |         with open(md_filename, 'w', encoding='utf-8') as f:
321 |             f.write("# 小说营销材料\n\n")
322 |             f.write(f"生成时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
323 |             
324 |             f.write("## 标题方案\n\n")
325 |             for platform, title in titles.items():
326 |                 f.write(f"- **{platform}**: {title}\n")
327 |             
328 |             f.write("\n## 故事梗概\n\n")
329 |             f.write(f"{summary}\n\n")
330 |             
331 |             f.write("## 封面提示词\n\n")
332 |             for platform, prompt in cover_prompts.items():
333 |                 title = titles.get(platform, "")
334 |                 f.write(f"### {platform}（{title}）\n")
335 |                 f.write(f"{prompt}\n\n")
336 |                 
337 |         return filename
338 |         
339 |     def one_click_generate(self, novel_config: Dict, chapter_summaries: List[str] = None) -> Dict:
340 |         """
341 |         一键生成所有营销内容
342 |         
343 |         Args:
344 |             novel_config: 小说配置信息
345 |             chapter_summaries: 章节摘要列表
346 |             
347 |         Returns:
348 |             Dict: 生成的所有内容
349 |         """
350 |         # 提取小说信息
351 |         novel_type = novel_config.get("type", "玄幻")
352 |         theme = novel_config.get("theme", "修真逆袭")
353 |         keywords = novel_config.get("keywords", [])
354 |         character_names = novel_config.get("main_characters", [])
355 |         
356 |         # 如果没有提供关键词，从主题中提取
357 |         if not keywords:
358 |             keywords = theme.split()
359 |             
360 |         # 如果没有提供角色名，使用默认值
361 |         if not character_names:
362 |             character_names = ["主角", "对手", "师傅"]
363 |             
364 |         # 提取大纲摘要
365 |         existing_outline = novel_config.get("outline_summary", "")
366 |         
367 |         # 1. 生成标题
368 |         titles = self.generate_titles(novel_type, theme, keywords, character_names, existing_outline)
369 |         logging.info(f"已生成{len(titles)}个标题")
370 |         
371 |         # 2. 生成梗概
372 |         summary = self.generate_summary(novel_type, theme, titles, chapter_summaries)
373 |         logging.info(f"已生成故事梗概，长度：{len(summary)}字")
374 |         
375 |         # 3. 生成封面提示词
376 |         cover_prompts = self.generate_cover_prompts(novel_type, titles, summary)
377 |         logging.info(f"已生成{len(cover_prompts)}个封面提示词")
378 |         
379 |         # 4. 保存到文件
380 |         saved_file = self.save_to_file(titles, summary, cover_prompts)
381 |         logging.info(f"已保存到文件：{saved_file}")
382 |         
383 |         return {
384 |             "titles": titles,
385 |             "summary": summary,
386 |             "cover_prompts": cover_prompts,
387 |             "saved_file": saved_file
388 |         } 


--------------------------------------------------------------------------------
/src/knowledge_base/knowledge_base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import hashlib
  4 | import faiss
  5 | import numpy as np
  6 | import jieba
  7 | from typing import List, Dict, Tuple, Optional
  8 | from dataclasses import dataclass
  9 | import logging
 10 | from FlagEmbedding import FlagReranker
 11 | 
 12 | @dataclass
 13 | class TextChunk:
 14 |     """文本块数据结构"""
 15 |     content: str
 16 |     chapter: int
 17 |     start_idx: int
 18 |     end_idx: int
 19 |     metadata: Dict
 20 | 
 21 | class KnowledgeBase:
 22 |     def __init__(self, config: Dict, embedding_model, reranker_model_name: str = None):
 23 |         self.config = config
 24 |         self.embedding_model = embedding_model
 25 |         self.chunks: List[TextChunk] = []
 26 |         self.index = None
 27 |         self.cache_dir = config["cache_dir"]
 28 |         self.is_built = False  # 添加构建状态标志
 29 |         os.makedirs(self.cache_dir, exist_ok=True)
 30 |         self.reranker_model_name = reranker_model_name
 31 |         self.reranker = None
 32 |         
 33 |     def _get_cache_path(self, text: str) -> str:
 34 |         """获取缓存文件路径"""
 35 |         text_hash = hashlib.md5(text.encode()).hexdigest()
 36 |         return os.path.join(self.cache_dir, f"kb_{text_hash}.pkl")
 37 |         
 38 |     def _chunk_text(self, text: str) -> List[TextChunk]:
 39 |         """将文本分割成块"""
 40 |         chunk_size = self.config["chunk_size"]
 41 |         overlap = self.config["chunk_overlap"]
 42 |         chunks = []
 43 |         
 44 |         # 按章节分割文本
 45 |         chapters = text.split("第")
 46 |         logging.info(f"文本分割为 {len(chapters)} 个章节")
 47 |         
 48 |         # 如果没有找到章节标记，将整个文本作为一个章节处理
 49 |         if len(chapters) <= 1:
 50 |             chapters = [text]
 51 |             start_idx = 0
 52 |         else:
 53 |             # 如果找到了章节标记，跳过第一个空分片
 54 |             chapters = [f"第{chapter}" for chapter in chapters[1:]]
 55 |             start_idx = 1
 56 |             
 57 |         for chapter_idx, chapter_content in enumerate(chapters, start_idx):
 58 |             try:
 59 |                 # 处理单个章节
 60 |                 sentences = list(jieba.cut(chapter_content, cut_all=False))
 61 |                 
 62 |                 current_chunk = []
 63 |                 current_length = 0
 64 |                 chunk_start_idx = 0
 65 |                 
 66 |                 for i, sentence in enumerate(sentences):
 67 |                     current_chunk.append(sentence)
 68 |                     current_length += len(sentence)
 69 |                     
 70 |                     # 当达到目标长度时创建新块
 71 |                     if current_length >= chunk_size:
 72 |                         chunk_text = "".join(current_chunk)
 73 |                         if chunk_text.strip():  # 确保块不为空
 74 |                             chunk = TextChunk(
 75 |                                 content=chunk_text,
 76 |                                 chapter=chapter_idx,
 77 |                                 start_idx=chunk_start_idx,
 78 |                                 end_idx=i,
 79 |                                 metadata={
 80 |                                     "chapter_content": chapter_content[:100] + "...",  # 只保存章节开头
 81 |                                     "previous_context": "".join(sentences[max(0, chunk_start_idx-10):chunk_start_idx]),
 82 |                                     "following_context": "".join(sentences[i+1:min(len(sentences), i+11)])
 83 |                                 }
 84 |                             )
 85 |                             chunks.append(chunk)
 86 |                             logging.debug(f"创建文本块: 章节={chapter_idx}, 长度={len(chunk_text)}")
 87 |                         
 88 |                         # 保留重叠部分
 89 |                         overlap_start = max(0, len(current_chunk) - overlap)
 90 |                         current_chunk = current_chunk[overlap_start:]
 91 |                         current_length = sum(len(t) for t in current_chunk)
 92 |                         chunk_start_idx = i - len(current_chunk) + 1
 93 |                 
 94 |                 # 处理最后一个块
 95 |                 if current_chunk:
 96 |                     chunk_text = "".join(current_chunk)
 97 |                     if chunk_text.strip():
 98 |                         chunk = TextChunk(
 99 |                             content=chunk_text,
100 |                             chapter=chapter_idx,
101 |                             start_idx=chunk_start_idx,
102 |                             end_idx=len(sentences)-1,
103 |                             metadata={
104 |                                 "chapter_content": chapter_content[:100] + "...",
105 |                                 "previous_context": "".join(sentences[max(0, chunk_start_idx-10):chunk_start_idx]),
106 |                                 "following_context": ""
107 |                             }
108 |                         )
109 |                         chunks.append(chunk)
110 |                 
111 |                 # 定期清理内存
112 |                 if chapter_idx % 10 == 0:
113 |                     del sentences
114 |                     import gc
115 |                     gc.collect()
116 |                     
117 |             except Exception as e:
118 |                 logging.error(f"处理第 {chapter_idx} 章时出错: {str(e)}")
119 |                 continue
120 |             
121 |         logging.info(f"总共创建了 {len(chunks)} 个文本块")
122 |         return chunks
123 |         
124 |     def _find_latest_temp_file(self, cache_path: str) -> Optional[Tuple[str, int]]:
125 |         """查找最新的临时文件"""
126 |         temp_files = []
127 |         for f in os.listdir(self.cache_dir):
128 |             if f.startswith(os.path.basename(cache_path) + ".temp_"):
129 |                 try:
130 |                     progress = int(f.split("_")[-1])
131 |                     temp_files.append((os.path.join(self.cache_dir, f), progress))
132 |                 except ValueError:
133 |                     continue
134 |         return max(temp_files, key=lambda x: x[1]) if temp_files else None
135 | 
136 |     def _load_from_temp(self, temp_file: str) -> Tuple[List[TextChunk], List]:
137 |         """从临时文件加载进度"""
138 |         try:
139 |             with open(temp_file, 'rb') as f:
140 |                 temp_data = pickle.load(f)
141 |                 return temp_data['chunks'], temp_data['vectors']
142 |         except Exception as e:
143 |             logging.error(f"加载临时文件失败: {str(e)}")
144 |             return [], []
145 | 
146 |     def build(self, text: str, force_rebuild: bool = False):
147 |         """构建知识库"""
148 |         cache_path = self._get_cache_path(text)
149 |         
150 |         # 检查缓存
151 |         if not force_rebuild and os.path.exists(cache_path):
152 |             try:
153 |                 with open(cache_path, 'rb') as f:
154 |                     cached_data = pickle.load(f)
155 |                 
156 |                 # 检查缓存格式兼容性
157 |                 if 'original_text' in cached_data and 'embedding_model_name' in cached_data:
158 |                     # 新格式缓存
159 |                     cached_model_name = cached_data.get('embedding_model_name', '')
160 |                     current_model_name = self.embedding_model.model_name
161 |                     
162 |                     if cached_model_name != current_model_name:
163 |                         logging.warning(f"嵌入模型配置已更改：缓存使用 {cached_model_name}，当前使用 {current_model_name}")
164 |                         logging.info("将重新构建知识库以使用新的嵌入模型配置")
165 |                         force_rebuild = True
166 |                     else:
167 |                         self.index = cached_data['index']
168 |                         self.chunks = cached_data['chunks']
169 |                         self.is_built = True
170 |                         logging.info("成功从缓存加载知识库")
171 |                         return
172 |                 else:
173 |                     # 旧格式缓存，检查维度兼容性
174 |                     if 'index' in cached_data and 'chunks' in cached_data:
175 |                         self.index = cached_data['index']
176 |                         self.chunks = cached_data['chunks']
177 |                         self.is_built = True
178 |                         logging.info("成功从旧格式缓存加载知识库")
179 |                         return
180 |                     else:
181 |                         logging.warning("缓存格式不完整，将重新构建")
182 |                         force_rebuild = True
183 |                         
184 |             except Exception as e:
185 |                 logging.warning(f"加载缓存失败: {e}")
186 |                 force_rebuild = True
187 |         
188 |         # 检查是否有临时文件可以恢复
189 |         temp_file_info = None if force_rebuild else self._find_latest_temp_file(cache_path)
190 |         start_idx = 0
191 |         vectors = []
192 |         
193 |         if temp_file_info:
194 |             temp_file, progress = temp_file_info
195 |             logging.info(f"发现临时文件，尝试从进度 {progress} 恢复...")
196 |             self.chunks, vectors = self._load_from_temp(temp_file)
197 |             if self.chunks and vectors:
198 |                 start_idx = progress
199 |                 logging.info(f"成功恢复到进度 {progress}，继续处理剩余内容")
200 |             else:
201 |                 logging.warning("临时文件加载失败，将从头开始处理")
202 |                 self.chunks = self._chunk_text(text)
203 |         else:
204 |             # 分块
205 |             self.chunks = self._chunk_text(text)
206 |         
207 |         logging.info(f"创建了 {len(self.chunks)} 个文本块")
208 |         
209 |         # 分批获取嵌入向量
210 |         batch_size = 100  # 每批处理100个文本块
211 |         
212 |         for i in range(start_idx, len(self.chunks), batch_size):
213 |             batch_chunks = self.chunks[i:i+batch_size]
214 |             batch_vectors = []
215 |             
216 |             for j, chunk in enumerate(batch_chunks):
217 |                 try:
218 |                     vector = self.embedding_model.embed(chunk.content)
219 |                     if vector is None or len(vector) == 0:
220 |                         logging.error(f"文本块 {i+j} 返回空向量")
221 |                         continue
222 |                     batch_vectors.append(vector)
223 |                     logging.info(f"生成文本块 {i+j} 的向量，维度: {len(vector)}")
224 |                 except Exception as e:
225 |                     logging.error(f"生成文本块 {i+j} 的向量时出错: {e}")
226 |                     continue
227 |             
228 |             vectors.extend(batch_vectors)
229 |             
230 |             # 定期保存中间结果
231 |             if i % 1000 == 0 and i > 0:
232 |                 temp_cache_path = cache_path + f".temp_{i}"
233 |                 with open(temp_cache_path, 'wb') as f:
234 |                     pickle.dump({
235 |                         'chunks': self.chunks[:i+batch_size],
236 |                         'vectors': vectors
237 |                     }, f)
238 |                 logging.info(f"保存临时进度到 {temp_cache_path}")
239 |         
240 |         if not vectors:
241 |             raise ValueError("没有生成有效的向量")
242 |         
243 |         # 构建索引
244 |         dimension = len(vectors[0])
245 |         logging.info(f"构建 FAISS 索引，维度 {dimension}")
246 |         self.index = faiss.IndexFlatL2(dimension)
247 |         vectors_array = np.array(vectors).astype('float32')
248 |         self.index.add(vectors_array)
249 |         
250 |         # 保存缓存
251 |         with open(cache_path, 'wb') as f:
252 |             pickle.dump({
253 |                 'index': self.index,
254 |                 'chunks': self.chunks,
255 |                 'original_text': text,  # 保存原始文本以便重新构建
256 |                 'embedding_model_name': self.embedding_model.model_name,  # 保存嵌入模型名称
257 |                 'embedding_dimension': dimension  # 保存嵌入维度
258 |             }, f)
259 |         logging.info("知识库构建完成并已缓存")
260 |         
261 |         # 清理临时文件
262 |         if not self.config.get("keep_temp_files", False):  # 添加配置选项来控制是否保留临时文件
263 |             for f in os.listdir(self.cache_dir):
264 |                 if f.startswith(os.path.basename(cache_path) + ".temp_"):
265 |                     try:
266 |                         os.remove(os.path.join(self.cache_dir, f))
267 |                     except Exception as e:
268 |                         logging.warning(f"清理临时文件 {f} 失败: {e}")
269 | 
270 |     def search(self, query: str, k: int = 5) -> List[str]:
271 |         """搜索相关内容"""
272 |         if not self.index:
273 |             logging.error("知识库索引未构建")
274 |             raise ValueError("Knowledge base not built yet")
275 |             
276 |         query_vector = self.embedding_model.embed(query)
277 |         
278 |         if query_vector is None:
279 |             logging.error("嵌入模型返回空向量")
280 |             return []
281 |             
282 |         # 搜索最相似的文本块
283 |         query_vector_array = np.array([query_vector]).astype('float32')
284 |         distances, indices = self.index.search(query_vector_array, k)
285 |         
286 |         # 返回相关文本内容
287 |         results = []
288 |         for idx in indices[0]:
289 |             if idx < len(self.chunks):
290 |                 results.append(self.chunks[idx].content)
291 |         return results
292 | 
293 |     def get_all_references(self) -> Dict[str, str]:
294 |         """获取所有参考内容"""
295 |         if not self.chunks:
296 |             return {}
297 |             
298 |         references = {}
299 |         for i, chunk in enumerate(self.chunks):
300 |             key = f"ref_{i+1}"
301 |             references[key] = chunk.content
302 |             
303 |             # 为了避免返回过多数据，只返回前10个参考
304 |             if i >= 9:
305 |                 break
306 |                 
307 |         return references
308 |         
309 |     def get_context(self, chunk: TextChunk, window_size: int = 2) -> Dict:
310 |         """获取文本块的上下文"""
311 |         chapter = chunk.chapter
312 |         relevant_chunks = [c for c in self.chunks if c.chapter == chapter]
313 |         
314 |         try:
315 |             chunk_idx = relevant_chunks.index(chunk)
316 |         except ValueError:
317 |             return {"previous_chunks": [], "next_chunks": [], "chapter_summary": ""}
318 |         
319 |         context = {
320 |             "previous_chunks": [],
321 |             "next_chunks": [],
322 |             "chapter_summary": chunk.metadata.get("chapter_content", "")
323 |         }
324 |         
325 |         # 获取前文
326 |         start_idx = max(0, chunk_idx - window_size)
327 |         context["previous_chunks"] = relevant_chunks[start_idx:chunk_idx]
328 |         
329 |         # 获取后文
330 |         end_idx = min(len(relevant_chunks), chunk_idx + window_size + 1)
331 |         context["next_chunks"] = relevant_chunks[chunk_idx + 1:end_idx]
332 |         
333 |         return context 
334 | 
335 |     def build_from_files(self, file_paths: List[str], force_rebuild: bool = False):
336 |         """从多个文件构建知识库"""
337 |         combined_text = ""
338 |         for file_path in file_paths:
339 |             try:
340 |                 with open(file_path, 'r', encoding='utf-8') as f:
341 |                     combined_text += f.read() + "\n\n"
342 |                 logging.info(f"已加载文件: {file_path}")
343 |             except Exception as e:
344 |                 logging.error(f"加载文件 {file_path} 失败: {str(e)}")
345 |                 continue
346 |         
347 |         if not combined_text.strip():
348 |             raise ValueError("所有参考文件加载失败，知识库内容为空")
349 |             
350 |         return self.build(combined_text, force_rebuild) 
351 | 
352 |     def build_from_texts(self, texts: List[str], cache_dir: Optional[str] = None) -> None:
353 |         """从文本列表构建知识库
354 |         
355 |         Args:
356 |             texts: 文本列表，例如章节内容列表
357 |             cache_dir: 缓存目录，如果提供则使用该目录，否则使用默认缓存目录
358 |         """
359 |         if cache_dir:
360 |             old_cache_dir = self.cache_dir
361 |             self.cache_dir = cache_dir
362 |             os.makedirs(self.cache_dir, exist_ok=True)
363 |         
364 |         try:
365 |             # 合并所有文本，加上章节标记
366 |             combined_text = ""
367 |             for i, text in enumerate(texts, 1):
368 |                 combined_text += f"第{i}章\n{text}\n\n"
369 |                 
370 |             # 使用现有的构建方法
371 |             self.build(combined_text)
372 |             logging.info(f"从 {len(texts)} 个文本构建知识库成功")
373 |             
374 |         except Exception as e:
375 |             logging.error(f"从文本构建知识库时出错: {str(e)}", exc_info=True)
376 |             raise
377 |         finally:
378 |             # 恢复原始缓存目录
379 |             if cache_dir:
380 |                 self.cache_dir = old_cache_dir 
381 | 
382 |     def get_openai_config(self, model_type: str) -> Dict:
383 |         """获取OpenAI配置"""
384 |         if model_type == "reranker":
385 |             return {
386 |                 "model_name": self.reranker_model_name,
387 |                 "api_key": "",
388 |                 "base_url": "",
389 |                 "use_fp16": True,
390 |                 "retry_delay": 5
391 |             }
392 |         else:
393 |             return {} 


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import json
  3 | import numpy as np
  4 | from typing import Optional, Dict, Any
  5 | from abc import ABC, abstractmethod
  6 | from .base_model import BaseModel
  7 | from tenacity import retry, stop_after_attempt, wait_fixed
  8 | 
  9 | class OutlineModel(BaseModel):
 10 |     """大纲生成模型"""
 11 |     def __init__(self, config: Dict[str, Any]):
 12 |         super().__init__(config)
 13 |         self._validate_config()
 14 |         # 获取实际的模型实例
 15 |         if config["type"] == "gemini":
 16 |             from .gemini_model import GeminiModel
 17 |             self.model = GeminiModel(config)
 18 |         elif config["type"] == "openai":
 19 |             from .openai_model import OpenAIModel
 20 |             self.model = OpenAIModel(config)
 21 |         elif config["type"] == "volcengine":
 22 |             from .openai_model import OpenAIModel
 23 |             self.model = OpenAIModel(config)  # 火山引擎复用OpenAI兼容实现
 24 |         else:
 25 |             raise ValueError(f"不支持的模型类型: {config['type']}")
 26 |         
 27 |     @retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
 28 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
 29 |         """生成章节大纲"""
 30 |         logging.info(f"使用模型 {self.model_name} 生成大纲")
 31 |         try:
 32 |             return self.model.generate(prompt, max_tokens)
 33 |         except Exception as e:
 34 |             logging.error(f"生成大纲时出错: {str(e)}")
 35 |             raise
 36 |         
 37 |     def embed(self, text: str) -> np.ndarray:
 38 |         """获取文本嵌入向量"""
 39 |         return self.model.embed(text)
 40 | 
 41 | class ContentModel(BaseModel):
 42 |     """内容生成模型"""
 43 |     def __init__(self, config: Dict[str, Any]):
 44 |         super().__init__(config)
 45 |         self._validate_config()
 46 |         # 获取实际的模型实例
 47 |         if config["type"] == "gemini":
 48 |             from .gemini_model import GeminiModel
 49 |             self.model = GeminiModel(config)
 50 |         elif config["type"] == "openai":
 51 |             from .openai_model import OpenAIModel
 52 |             self.model = OpenAIModel(config)
 53 |         elif config["type"] == "volcengine":
 54 |             from .openai_model import OpenAIModel
 55 |             self.model = OpenAIModel(config)  # 火山引擎复用OpenAI兼容实现
 56 |         else:
 57 |             raise ValueError(f"不支持的模型类型: {config['type']}")
 58 |         
 59 |     @retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
 60 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
 61 |         """生成章节内容"""
 62 |         logging.info(f"使用模型 {self.model_name} 生成内容")
 63 |         try:
 64 |             return self.model.generate(prompt, max_tokens)
 65 |         except Exception as e:
 66 |             logging.error(f"生成内容时出错: {str(e)}")
 67 |             raise
 68 |         
 69 |     def embed(self, text: str) -> np.ndarray:
 70 |         """获取文本嵌入向量"""
 71 |         return self.model.embed(text)
 72 | 
 73 | class EmbeddingModel(BaseModel):
 74 |     """文本嵌入模型"""
 75 |     def __init__(self, config: Dict[str, Any]):
 76 |         super().__init__(config)
 77 |         self._validate_config()
 78 |         # 获取实际的模型实例
 79 |         if config["type"] == "gemini":
 80 |             from .gemini_model import GeminiModel
 81 |             self.model = GeminiModel(config)
 82 |         elif config["type"] == "openai":
 83 |             from .openai_model import OpenAIModel
 84 |             self.model = OpenAIModel(config)
 85 |         elif config["type"] == "volcengine":
 86 |             from .openai_model import OpenAIModel
 87 |             self.model = OpenAIModel(config)  # 火山引擎复用OpenAI兼容实现
 88 |         else:
 89 |             raise ValueError(f"不支持的模型类型: {config['type']}")
 90 |         
 91 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
 92 |         """生成文本（不支持）"""
 93 |         raise NotImplementedError("EmbeddingModel不支持文本生成")
 94 |         
 95 |     def embed(self, text: str) -> np.ndarray:
 96 |         """获取文本嵌入向量"""
 97 |         logging.info(f"使用模型 {self.model_name} 生成文本嵌入")
 98 |         return self.model.embed(text)
 99 | 
100 | # 导出所有模型类
101 | __all__ = ['BaseModel', 'OutlineModel', 'ContentModel', 'EmbeddingModel'] 


--------------------------------------------------------------------------------
/src/models/base_model.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import numpy as np
 3 | from typing import Optional, Dict, Any
 4 | 
 5 | class BaseModel(ABC):
 6 |     """AI模型基础接口类"""
 7 |     
 8 |     def __init__(self, config: Dict[str, Any]):
 9 |         self.config = config
10 |         self.api_key = config.get("api_key", "")
11 |         self.model_name = config.get("model_name", "")
12 |         
13 |     @abstractmethod
14 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
15 |         """生成文本"""
16 |         pass
17 |         
18 |     @abstractmethod
19 |     def embed(self, text: str) -> np.ndarray:
20 |         """获取文本嵌入向量"""
21 |         pass
22 |         
23 |     def _validate_config(self) -> bool:
24 |         """验证配置是否有效"""
25 |         if not self.api_key:
26 |             raise ValueError("API key is required")
27 |         if not self.model_name:
28 |             raise ValueError("Model name is required")
29 |         return True
30 |     
31 |     def close(self):
32 |         """关闭模型客户端，子类应该重写此方法"""
33 |         pass 


--------------------------------------------------------------------------------
/src/models/gemini_model.py:
--------------------------------------------------------------------------------
  1 | import google.generativeai as genai
  2 | import numpy as np
  3 | import time
  4 | import logging
  5 | import os
  6 | from typing import Optional, Dict, Any
  7 | from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  8 | from .base_model import BaseModel
  9 | 
 10 | # 导入网络管理相关模块
 11 | try:
 12 |     from ..network.config import PoolConfig
 13 |     from ..network.model_client import ModelHTTPClient, ModelClientFactory
 14 |     from ..network.errors import NetworkError, TimeoutError, ConnectionError
 15 |     NETWORK_AVAILABLE = True
 16 | except ImportError:
 17 |     NETWORK_AVAILABLE = False
 18 |     # 使用标准HTTP客户端
 19 | 
 20 | class GeminiModel(BaseModel):
 21 |     """Gemini模型实现，支持官方和OpenAI兼容API分流"""
 22 |     
 23 |     def __init__(self, config: Dict[str, Any]):
 24 |         super().__init__(config)
 25 |         self._validate_config()
 26 |         self.model_name = config.get('model_name', 'gemini-2.5-flash')
 27 |         self.temperature = config.get('temperature', 0.7)
 28 |         self.timeout = config.get('timeout', 60)
 29 |         self.retry_delay = config.get('retry_delay', 30)
 30 |         self.max_retries = config.get('max_retries', 5)
 31 |         self.max_input_length = config.get('max_input_length', 500000)
 32 |         self.api_key = config.get('api_key', None)
 33 |         self.base_url = config.get('base_url', None)
 34 |         # 判断是否为官方Gemini模型（支持带models/前缀的格式）
 35 |         gemini_official_models = [
 36 |             "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash",
 37 |             "models/gemini-2.5-pro", "models/gemini-2.5-flash", "models/gemini-2.0-flash", "models/gemini-2.5-flash-lite"
 38 |         ]
 39 |         self.is_gemini_official = self.model_name in gemini_official_models
 40 |         
 41 |         # 备用模型配置
 42 |         self._setup_fallback_config()
 43 | 
 44 |         # 初始化网络管理客户端（如果可用且不是官方Gemini）
 45 |         if NETWORK_AVAILABLE and not self.is_gemini_official and self.base_url:
 46 |             # 创建连接池配置
 47 |             pool_config = PoolConfig(
 48 |                 max_connections=config.get("max_connections", 100),
 49 |                 max_connections_per_host=config.get("max_connections_per_host", 10),
 50 |                 connection_timeout=config.get("connection_timeout", 30.0),
 51 |                 read_timeout=config.get("read_timeout", self.timeout),
 52 |                 idle_timeout=config.get("idle_timeout", 300.0),
 53 |                 enable_http2=config.get("enable_http2", True),
 54 |                 enable_keepalive=config.get("enable_keepalive", True)
 55 |             )
 56 |             
 57 |             # 创建网络管理客户端
 58 |             self.network_client = ModelClientFactory.create_openai_client(
 59 |                 base_url=self.base_url,
 60 |                 api_key=self.api_key,
 61 |                 pool_config=pool_config,
 62 |                 timeout=self.timeout
 63 |             )
 64 |             
 65 |             # 创建备用网络客户端
 66 |             if self.fallback_api_key:
 67 |                 self.fallback_network_client = ModelClientFactory.create_openai_client(
 68 |                     base_url=self.fallback_base_url,
 69 |                     api_key=self.fallback_api_key,
 70 |                     pool_config=pool_config,
 71 |                     timeout=config.get("fallback_timeout", 180)
 72 |                 )
 73 |             else:
 74 |                 self.fallback_network_client = None
 75 |             
 76 |             logging.info(f"Gemini model initialized with network management: {self.base_url}")
 77 |         else:
 78 |             self.network_client = None
 79 |             self.fallback_network_client = None
 80 | 
 81 |         # 初始化模型客户端
 82 |         if self.is_gemini_official:
 83 |             genai.configure(api_key=self.api_key)
 84 |             
 85 |             # 导入安全配置管理器
 86 |             from .gemini_safety_config import GeminiSafetyConfig
 87 |             
 88 |             # 获取安全设置
 89 |             content_type = config.get('content_type', 'creative')
 90 |             self.safety_settings = GeminiSafetyConfig.get_safety_settings_for_content_type(content_type)
 91 |             
 92 |             self.model = genai.GenerativeModel(
 93 |                 self.model_name,
 94 |                 safety_settings=self.safety_settings
 95 |             )
 96 |             logging.info(f"Gemini模型初始化完成，使用{content_type}内容类型的安全设置")
 97 |         else:
 98 |             # OpenAI兼容API客户端（保持作为备用）
 99 |             try:
100 |                 from openai import OpenAI
101 |                 self.openai_client = OpenAI(
102 |                     api_key=self.api_key,
103 |                     base_url=self.base_url,
104 |                     timeout=self.timeout
105 |                 )
106 |             except ImportError:
107 |                 self.openai_client = None
108 |                 logging.error("OpenAI库未安装，无法使用OpenAI兼容API模型")
109 | 
110 |     def _setup_fallback_config(self):
111 |         """设置备用模型配置"""
112 |         fallback_enabled = self.config.get("fallback_enabled", True)
113 |         if not fallback_enabled:
114 |             self.fallback_api_key = ""
115 |             self.fallback_base_url = ""
116 |             self.fallback_model_name = ""
117 |             logging.info("Gemini模型备用功能已禁用")
118 |             return
119 |         self.fallback_base_url = self.config.get("fallback_base_url", "https://api.siliconflow.cn/v1")
120 |         self.fallback_api_key = self.config.get("fallback_api_key", os.getenv("OPENAI_EMBEDDING_API_KEY", ""))
121 |         fallback_models = self.config.get("fallback_models", {
122 |             "flash": "deepseek-ai/DeepSeek-V3.1",
123 |             "pro": "Qwen/Qwen3-235B-A22B-Thinking-2507", 
124 |             "default": "deepseek-ai/DeepSeek-V3.1"
125 |         })
126 |         if "flash" in self.model_name.lower():
127 |             self.fallback_model_name = fallback_models.get("flash", fallback_models["default"])
128 |         elif "pro" in self.model_name.lower():
129 |             self.fallback_model_name = fallback_models.get("pro", fallback_models["default"])
130 |         else:
131 |             self.fallback_model_name = fallback_models["default"]
132 |         logging.info(f"Gemini模型备用配置: {self.fallback_model_name}")
133 | 
134 |     def _truncate_prompt(self, prompt: str) -> str:
135 |         if len(prompt) <= self.max_input_length:
136 |             return prompt
137 |         logging.warning(f"提示词长度 ({len(prompt)}) 超过限制 ({self.max_input_length})，将进行截断")
138 |         keep_start = int(self.max_input_length * 0.7)
139 |         keep_end = int(self.max_input_length * 0.2)
140 |         truncated = prompt[:keep_start] + "\n\n[内容过长，已截断中间部分...]\n\n" + prompt[-keep_end:]
141 |         logging.info(f"截断后长度: {len(truncated)}")
142 |         return truncated
143 |     
144 |     def _use_network_client_for_generation(self, prompt: str, max_tokens: Optional[int] = None) -> str:
145 |         """使用网络管理客户端进行文本生成（仅用于OpenAI兼容API）"""
146 |         if self.is_gemini_official:
147 |             raise Exception("官方Gemini模型不支持网络管理客户端")
148 |         
149 |         try:
150 |             messages = [{"role": "user", "content": prompt}]
151 |             
152 |             # 使用网络管理客户端
153 |             response_data = self.network_client.chat_completion(
154 |                 model=self.model_name,
155 |                 messages=messages,
156 |                 max_tokens=max_tokens,
157 |                 temperature=self.temperature
158 |             )
159 |             
160 |             content = response_data.get('choices', [{}])[0].get('message', {}).get('content')
161 |             if content is None:
162 |                 raise Exception("模型返回空内容")
163 |                 
164 |             logging.info(f"网络管理客户端生成成功，返回内容长度: {len(content)}")
165 |             return content
166 |             
167 |         except (NetworkError, TimeoutError, ConnectionError) as e:
168 |             logging.error(f"网络管理客户端生成失败: {str(e)}")
169 |             
170 |             # 如果配置了备用网络客户端，尝试使用
171 |             if self.fallback_network_client:
172 |                 logging.warning("尝试使用备用网络客户端...")
173 |                 try:
174 |                     response_data = self.fallback_network_client.chat_completion(
175 |                         model=self.fallback_model_name,
176 |                         messages=messages,
177 |                         max_tokens=max_tokens,
178 |                         temperature=self.temperature
179 |                     )
180 |                     
181 |                     content = response_data.get('choices', [{}])[0].get('message', {}).get('content')
182 |                     if content is None:
183 |                         raise Exception("备用模型返回空内容")
184 |                         
185 |                     logging.info(f"备用网络客户端生成成功，返回内容长度: {len(content)}")
186 |                     return content
187 |                     
188 |                 except Exception as fallback_error:
189 |                     logging.error(f"备用网络客户端也失败了: {str(fallback_error)}")
190 |             
191 |             # 重新抛出原始异常
192 |             raise e
193 |         except Exception as e:
194 |             logging.error(f"网络管理客户端生成出现未知错误: {str(e)}")
195 |             raise e
196 | 
197 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
198 |         """生成文本，支持官方Gemini和OpenAI兼容API分流"""
199 |         last_exception = None
200 |         prompt = self._truncate_prompt(prompt)
201 |         if self.is_gemini_official:
202 |             # 官方Gemini模型调用
203 |             for attempt in range(self.max_retries):
204 |                 try:
205 |                     logging.info(f"Gemini模型调用 (尝试 {attempt + 1}/{self.max_retries})")
206 |                     generation_config = {"temperature": self.temperature}
207 |                     if max_tokens:
208 |                         generation_config["max_output_tokens"] = max_tokens
209 |                     response = self.model.generate_content(
210 |                         prompt,
211 |                         generation_config=generation_config,
212 |                         request_options={"timeout": self.timeout}
213 |                     )
214 |                     
215 |                     # 直接处理响应
216 |                     from .gemini_safety_config import GeminiSafetyConfig
217 |                     
218 |                     # 检查响应是否有效
219 |                     if not response or not response.candidates:
220 |                         raise Exception("模型返回空响应或无候选结果")
221 |                     
222 |                     candidate = response.candidates[0]
223 |                     
224 |                     # 记录安全评级
225 |                     if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
226 |                         safety_ratings = {}
227 |                         for rating in candidate.safety_ratings:
228 |                             safety_ratings[rating.category.name] = rating.probability.name
229 |                         GeminiSafetyConfig.log_safety_ratings(safety_ratings)
230 |                         logging.info(f"安全评级: {safety_ratings}")
231 |                     
232 |                     # 检查完成原因
233 |                     finish_reason = candidate.finish_reason.name if hasattr(candidate, 'finish_reason') else 'UNKNOWN'
234 |                     logging.info(f"完成原因: {finish_reason}")
235 |                     
236 |                     # 提取内容
237 |                     if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts'):
238 |                         content_parts = []
239 |                         for part in candidate.content.parts:
240 |                             if hasattr(part, 'text') and part.text:
241 |                                 content_parts.append(part.text)
242 |                         
243 |                         if content_parts:
244 |                             content = ''.join(content_parts)
245 |                             logging.info(f"Gemini模型调用成功，内容长度: {len(content)}")
246 |                             return content
247 |                     
248 |                     # 如果没有内容，提供详细错误信息
249 |                     error_msg = f"模型返回空响应 - 完成原因: {finish_reason}"
250 |                     if finish_reason == 'SAFETY':
251 |                         error_msg += "\n建议: 内容可能触发了安全过滤器，请尝试修改提示词或调整安全设置"
252 |                     elif finish_reason == 'MAX_TOKENS':
253 |                         error_msg += "\n建议: 响应长度超过限制，请尝试增加max_tokens参数"
254 |                     elif finish_reason == 'RECITATION':
255 |                         error_msg += "\n建议: 内容可能涉及版权问题，请修改提示词"
256 |                     
257 |                     raise Exception(error_msg)
258 |                 except Exception as e:
259 |                     last_exception = e
260 |                     error_msg = str(e)
261 |                     logging.error(f"Gemini模型调用失败 (尝试 {attempt + 1}/{self.max_retries}): {error_msg}")
262 |                     if "500" in error_msg or "internal error" in error_msg.lower():
263 |                         delay = self.retry_delay * (attempt + 1) * 2
264 |                     else:
265 |                         delay = self.retry_delay * (attempt + 1)
266 |                     if attempt < self.max_retries - 1:
267 |                         logging.info(f"等待 {delay} 秒后重试...")
268 |                         time.sleep(delay)
269 |                     else:
270 |                         logging.error(f"所有重试都失败了，最后一次错误: {str(e)}")
271 |             # 官方模型失败后尝试 fallback
272 |             if self.fallback_api_key:
273 |                 logging.warning("Gemini模型失败，尝试使用备用模型...")
274 |                 try:
275 |                     from openai import OpenAI
276 |                     fallback_client = OpenAI(
277 |                         api_key=self.fallback_api_key,
278 |                         base_url=self.fallback_base_url,
279 |                         timeout=self.config.get("fallback_timeout", 180)
280 |                     )
281 |                     logging.info(f"使用备用模型: {self.fallback_model_name}")
282 |                     response = fallback_client.chat.completions.create(
283 |                         model=self.fallback_model_name,
284 |                         messages=[{"role": "user", "content": prompt}],
285 |                         max_tokens=max_tokens,
286 |                         temperature=self.temperature
287 |                     )
288 |                     content = response.choices[0].message.content
289 |                     if content:
290 |                         logging.info(f"备用模型调用成功，返回内容长度: {len(content)}")
291 |                         return content
292 |                     else:
293 |                         raise Exception("备用模型返回空响应")
294 |                 except Exception as fallback_error:
295 |                     logging.error(f"备用模型也失败了: {str(fallback_error)}")
296 |                     last_exception = fallback_error
297 |             raise Exception(f"All models failed. Last error: {str(last_exception)}")
298 |         else:
299 |             # OpenAI兼容API模型调用
300 |             # 优先使用网络管理客户端
301 |             if NETWORK_AVAILABLE and self.network_client:
302 |                 try:
303 |                     return self._use_network_client_for_generation(prompt, max_tokens)
304 |                 except Exception as e:
305 |                     logging.warning(f"网络管理客户端失败，回退到原始客户端: {str(e)}")
306 |             
307 |             # 回退到原始OpenAI客户端
308 |             if not self.openai_client:
309 |                 raise Exception("OpenAI兼容API客户端未初始化，无法调用自定义模型")
310 |             try:
311 |                 logging.info(f"直接调用OpenAI兼容API模型: {self.model_name}")
312 |                 response = self.openai_client.chat.completions.create(
313 |                     model=self.model_name,
314 |                     messages=[{"role": "user", "content": prompt}],
315 |                     max_tokens=max_tokens,
316 |                     temperature=self.temperature
317 |                 )
318 |                 content = response.choices[0].message.content
319 |                 if content:
320 |                     logging.info(f"OpenAI兼容API模型调用成功，返回内容长度: {len(content)}")
321 |                     return content
322 |                 else:
323 |                     raise Exception("OpenAI兼容API模型返回空响应")
324 |             except Exception as e:
325 |                 logging.error(f"OpenAI兼容API模型调用失败: {str(e)}")
326 |                 raise
327 | 
328 |     def embed(self, text: str) -> np.ndarray:
329 |         raise NotImplementedError("Embedding is not supported in Gemini model yet")
330 |     
331 |     def close(self):
332 |         """关闭模型客户端"""
333 |         if NETWORK_AVAILABLE:
334 |             if self.network_client:
335 |                 self.network_client.close()
336 |             if self.fallback_network_client:
337 |                 self.fallback_network_client.close()
338 |         logging.debug("Gemini model clients closed")
339 |     
340 |     def __del__(self):
341 |         """析构函数，确保资源清理"""
342 |         try:
343 |             self.close()
344 |         except:
345 |             pass 


--------------------------------------------------------------------------------
/src/models/openai_model.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | import numpy as np
  3 | from typing import Optional, Dict, Any
  4 | from tenacity import retry, stop_after_attempt, wait_fixed, wait_exponential
  5 | from .base_model import BaseModel
  6 | import logging
  7 | import json
  8 | import time
  9 | import os
 10 | 
 11 | # 导入网络管理相关模块
 12 | try:
 13 |     from ..network.config import PoolConfig
 14 |     from ..network.model_client import ModelHTTPClient, ModelClientFactory
 15 |     from ..network.errors import NetworkError, TimeoutError, ConnectionError
 16 |     NETWORK_AVAILABLE = True
 17 | except ImportError:
 18 |     NETWORK_AVAILABLE = False
 19 |     # 使用标准HTTP客户端
 20 | 
 21 | class OpenAIModel(BaseModel):
 22 |     """OpenAI模型实现"""
 23 |     
 24 |     def __init__(self, config: Dict[str, Any]):
 25 |         super().__init__(config)
 26 |         self._validate_config()
 27 |         
 28 |         # 检查是否为火山引擎配置
 29 |         if config.get("type") == "volcengine":
 30 |             self.is_volcengine = True
 31 |             self.thinking_enabled = config.get("thinking_enabled", True)
 32 |             self._init_volcengine_client(config)
 33 |         else:
 34 |             self.is_volcengine = False
 35 |             self.thinking_enabled = False
 36 |             self._init_standard_client(config)
 37 |             
 38 |     def _init_volcengine_client(self, config: Dict[str, Any]):
 39 |         """初始化火山引擎客户端"""
 40 |         timeout = config.get("timeout", 300)
 41 |         base_url = config.get("base_url")
 42 |         
 43 |         # 备用API配置
 44 |         self.fallback_enabled = config.get("fallback_enabled", False)
 45 |         if self.fallback_enabled:
 46 |             self.fallback_base_url = config.get("fallback_base_url", "https://api.siliconflow.cn/v1")
 47 |             self.fallback_api_key = config.get("fallback_api_key", "")
 48 |             self.fallback_model_name = config.get("fallback_model_name", "deepseek-ai/DeepSeek-V3.1")
 49 |         
 50 |         # 初始化火山引擎客户端
 51 |         self.volcengine_client = OpenAI(
 52 |             api_key=config["api_key"],
 53 |             base_url=base_url,
 54 |             timeout=timeout
 55 |         )
 56 |         
 57 |         logging.info(f"火山引擎DeepSeek-V3.1模型初始化完成: {base_url}, 深度思考: {self.thinking_enabled}")
 58 |         
 59 |     def _init_standard_client(self, config: Dict[str, Any]):
 60 |         """初始化标准OpenAI客户端（原有逻辑）"""
 61 |         # 增加超时时间，特别是对于本地服务器
 62 |         timeout = config.get("timeout", 120)  # 默认120秒
 63 |         base_url = config.get("base_url", "https://api.siliconflow.cn/v1")
 64 |         
 65 |         # 备用API配置
 66 |         self.fallback_base_url = "https://api.siliconflow.cn/v1"
 67 |         self.fallback_api_key = os.getenv("OPENAI_EMBEDDING_API_KEY", "")  # 使用embedding的API key作为备用
 68 |         # 根据当前模型类型选择备用模型
 69 |         if "gemini-2.5-flash" in self.model_name:
 70 |             self.fallback_model_name = "moonshotai/Kimi-K2-Instruct"  # 使用Kimi-K2作为gemini-2.5-flash的备用
 71 |         elif "gemini-2.5-pro" in self.model_name:
 72 |             self.fallback_model_name = "Qwen/Qwen3-235B-A22B-Thinking-2507"  # 使用Qwen作为gemini-2.5-pro的备用
 73 |         else:
 74 |             self.fallback_model_name = "deepseek-ai/DeepSeek-V3.1"  # 默认备用模型
 75 |         
 76 |         # 初始化网络管理客户端（如果可用）
 77 |         if NETWORK_AVAILABLE:
 78 |             # 创建连接池配置
 79 |             pool_config = PoolConfig(
 80 |                 max_connections=config.get("max_connections", 100),
 81 |                 max_connections_per_host=config.get("max_connections_per_host", 10),
 82 |                 connection_timeout=config.get("connection_timeout", 30.0),
 83 |                 read_timeout=config.get("read_timeout", timeout),
 84 |                 idle_timeout=config.get("idle_timeout", 300.0),
 85 |                 enable_http2=config.get("enable_http2", True),
 86 |                 enable_keepalive=config.get("enable_keepalive", True)
 87 |             )
 88 |             
 89 |             # 创建网络管理客户端
 90 |             self.network_client = ModelClientFactory.create_openai_client(
 91 |                 base_url=base_url,
 92 |                 api_key=config["api_key"],
 93 |                 pool_config=pool_config,
 94 |                 timeout=timeout
 95 |             )
 96 |             
 97 |             # 创建备用客户端
 98 |             if self.fallback_api_key:
 99 |                 self.fallback_network_client = ModelClientFactory.create_openai_client(
100 |                     base_url=self.fallback_base_url,
101 |                     api_key=self.fallback_api_key,
102 |                     pool_config=pool_config,
103 |                     timeout=180  # 备用API使用更长的超时时间
104 |                 )
105 |             else:
106 |                 self.fallback_network_client = None
107 |             
108 |             logging.info(f"OpenAI model initialized with network management: {base_url}, timeout: {timeout}s")
109 |         else:
110 |             # 回退到原始OpenAI客户端
111 |             self.network_client = None
112 |             self.fallback_network_client = None
113 |             
114 |         # 保持原始客户端作为备用
115 |         self.client = OpenAI(
116 |             api_key=config["api_key"],
117 |             base_url=base_url,
118 |             timeout=timeout
119 |         )
120 |         logging.info(f"OpenAI model initialized with base URL: {base_url}, timeout: {timeout}s")
121 |         
122 |     def _build_volcengine_messages(self, prompt: str) -> list:
123 |         """构建火山引擎消息格式"""
124 |         messages = [{"role": "user", "content": prompt}]
125 |         
126 |         if self.thinking_enabled:
127 |             # 添加深度思考指令
128 |             thinking_instruction = """
129 | 请使用深度思考模式来回答这个问题。在回答之前，请在<thinking>标签中详细分析问题，
130 | 考虑多个角度和可能的解决方案，然后给出最终的回答。
131 | """
132 |             messages[0]["content"] = thinking_instruction + "\n\n" + prompt
133 |         
134 |         return messages
135 |     
136 |     def _process_thinking_output(self, content: str) -> str:
137 |         """处理包含思考过程的输出"""
138 |         # 提取思考过程和最终答案
139 |         if "<thinking>" in content and "</thinking>" in content:
140 |             # 记录思考过程用于调试
141 |             thinking_start = content.find("<thinking>")
142 |             thinking_end = content.find("</thinking>") + len("</thinking>")
143 |             thinking_process = content[thinking_start:thinking_end]
144 |             
145 |             logging.debug(f"深度思考过程: {thinking_process[:500]}...")
146 |             
147 |             # 返回思考标签后的内容作为最终答案
148 |             final_answer = content[thinking_end:].strip()
149 |             if final_answer:
150 |                 return final_answer
151 |             else:
152 |                 # 如果没有思考标签后的内容，返回整个内容
153 |                 return content
154 |         
155 |         return content
156 |     
157 |     def _create_fallback_client(self):
158 |         """创建备用客户端"""
159 |         if self.fallback_api_key:
160 |             logging.warning(f"切换到备用API: {self.fallback_base_url}, 模型: {self.fallback_model_name}")
161 |             return OpenAI(
162 |                 api_key=self.fallback_api_key,
163 |                 base_url=self.fallback_base_url,
164 |                 timeout=180  # 备用API使用更长的超时时间
165 |             )
166 |         return None
167 |     
168 |     def _generate_with_volcengine(self, prompt: str, max_tokens: Optional[int] = None) -> str:
169 |         """使用火山引擎DeepSeek-V3.1生成文本"""
170 |         logging.info(f"使用火山引擎DeepSeek-V3.1生成文本，提示词长度: {len(prompt)}")
171 |         
172 |         # 构建消息
173 |         messages = self._build_volcengine_messages(prompt)
174 |         
175 |         # 火山引擎 DeepSeek-V3.1 的 max_tokens 限制为 32768
176 |         effective_max_tokens = max_tokens or self.config.get("max_tokens", 8192)
177 |         if effective_max_tokens > 32768:
178 |             logging.warning(f"max_tokens {effective_max_tokens} 超过火山引擎限制，调整为 32768")
179 |             effective_max_tokens = 32768
180 |         
181 |         # 设置生成参数
182 |         generation_params = {
183 |             "model": self.model_name,
184 |             "messages": messages,
185 |             "temperature": self.config.get("temperature", 0.7),
186 |             "max_tokens": effective_max_tokens
187 |         }
188 |         
189 |         try:
190 |             response = self.volcengine_client.chat.completions.create(**generation_params)
191 |             content = response.choices[0].message.content
192 |             
193 |             if content is None:
194 |                 raise Exception("火山引擎模型返回空内容")
195 |             
196 |             # 处理深度思考输出
197 |             if self.thinking_enabled:
198 |                 content = self._process_thinking_output(content)
199 |             
200 |             logging.info(f"火山引擎生成成功，返回内容长度: {len(content)}")
201 |             return content
202 |             
203 |         except Exception as e:
204 |             logging.error(f"火山引擎生成失败: {str(e)}")
205 |             
206 |             # 尝试使用备用模型
207 |             if self.fallback_enabled and self.fallback_api_key:
208 |                 return self._generate_with_fallback(prompt, max_tokens)
209 |             
210 |             raise e
211 |     
212 |     def _generate_with_fallback(self, prompt: str, max_tokens: Optional[int] = None) -> str:
213 |         """使用备用模型生成文本"""
214 |         logging.warning(f"切换到备用模型: {self.fallback_model_name}")
215 |         
216 |         fallback_client = OpenAI(
217 |             api_key=self.fallback_api_key,
218 |             base_url=self.fallback_base_url,
219 |             timeout=180
220 |         )
221 |         
222 |         try:
223 |             response = fallback_client.chat.completions.create(
224 |                 model=self.fallback_model_name,
225 |                 messages=[{"role": "user", "content": prompt}],
226 |                 max_tokens=max_tokens or 8192,
227 |                 temperature=0.7
228 |             )
229 |             
230 |             content = response.choices[0].message.content
231 |             if content is None:
232 |                 raise Exception("备用模型返回空内容")
233 |                 
234 |             logging.info(f"备用模型生成成功，返回内容长度: {len(content)}")
235 |             return content
236 |             
237 |         except Exception as fallback_error:
238 |             logging.error(f"备用模型也失败了: {str(fallback_error)}")
239 |             raise fallback_error
240 |     
241 |     def _use_network_client_for_generation(self, prompt: str, max_tokens: Optional[int] = None) -> str:
242 |         """使用网络管理客户端进行文本生成"""
243 |         try:
244 |             # 如果提示词太长，进行截断
245 |             max_prompt_length = 65536  # 设置最大提示词长度
246 |             if len(prompt) > max_prompt_length:
247 |                 logging.warning(f"提示词过长 ({len(prompt)} 字符)，截断到 {max_prompt_length} 字符")
248 |                 prompt = prompt[:max_prompt_length]
249 |             
250 |             messages = [{"role": "user", "content": prompt}]
251 |             
252 |             # 使用网络管理客户端
253 |             response_data = self.network_client.chat_completion(
254 |                 model=self.model_name,
255 |                 messages=messages,
256 |                 max_tokens=max_tokens,
257 |                 temperature=0.7
258 |             )
259 |             
260 |             content = response_data.get('choices', [{}])[0].get('message', {}).get('content')
261 |             if content is None:
262 |                 raise Exception("模型返回空内容")
263 |                 
264 |             logging.info(f"网络管理客户端生成成功，返回内容长度: {len(content)}")
265 |             return content
266 |             
267 |         except (NetworkError, TimeoutError, ConnectionError) as e:
268 |             logging.error(f"网络管理客户端生成失败: {str(e)}")
269 |             
270 |             # 如果配置了备用网络客户端，尝试使用
271 |             if self.fallback_network_client:
272 |                 logging.warning("尝试使用备用网络客户端...")
273 |                 try:
274 |                     response_data = self.fallback_network_client.chat_completion(
275 |                         model=self.fallback_model_name,
276 |                         messages=messages,
277 |                         max_tokens=max_tokens,
278 |                         temperature=0.7
279 |                     )
280 |                     
281 |                     content = response_data.get('choices', [{}])[0].get('message', {}).get('content')
282 |                     if content is None:
283 |                         raise Exception("备用模型返回空内容")
284 |                         
285 |                     logging.info(f"备用网络客户端生成成功，返回内容长度: {len(content)}")
286 |                     return content
287 |                     
288 |                 except Exception as fallback_error:
289 |                     logging.error(f"备用网络客户端也失败了: {str(fallback_error)}")
290 |             
291 |             # 重新抛出原始异常
292 |             raise e
293 |         except Exception as e:
294 |             logging.error(f"网络管理客户端生成出现未知错误: {str(e)}")
295 |             raise e
296 |     
297 |     def _use_network_client_for_embedding(self, text: str) -> np.ndarray:
298 |         """使用网络管理客户端获取嵌入向量"""
299 |         try:
300 |             logging.info(f"使用网络管理客户端生成嵌入向量，文本长度: {len(text)}")
301 |             
302 |             response_data = self.network_client.embeddings(
303 |                 model=self.model_name,
304 |                 input_text=text
305 |             )
306 |             
307 |             # 解析响应
308 |             if 'data' in response_data and len(response_data['data']) > 0:
309 |                 embedding = np.array(response_data['data'][0]['embedding'])
310 |                 logging.info(f"网络管理客户端成功生成嵌入向量，维度: {len(embedding)}")
311 |                 return embedding
312 |             else:
313 |                 logging.error("嵌入响应数据为空或无效")
314 |                 raise Exception("嵌入响应数据为空或无效")
315 |                 
316 |         except (NetworkError, TimeoutError, ConnectionError) as e:
317 |             logging.error(f"网络管理客户端嵌入失败: {str(e)}")
318 |             raise e
319 |         except Exception as e:
320 |             logging.error(f"网络管理客户端嵌入出现未知错误: {str(e)}")
321 |             raise e
322 |         
323 |     @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60))
324 |     def generate(self, prompt: str, max_tokens: Optional[int] = None) -> str:
325 |         """生成文本"""
326 |         logging.info(f"开始生成文本，模型: {self.model_name}, 提示词长度: {len(prompt)}")
327 |         
328 |         # 如果是火山引擎，使用专用的生成方法
329 |         if self.is_volcengine:
330 |             return self._generate_with_volcengine(prompt, max_tokens)
331 |         
332 |         # 优先使用网络管理客户端
333 |         if NETWORK_AVAILABLE and self.network_client:
334 |             try:
335 |                 return self._use_network_client_for_generation(prompt, max_tokens)
336 |             except Exception as e:
337 |                 logging.warning(f"网络管理客户端失败，回退到原始客户端: {str(e)}")
338 |         
339 |         # 回退到原始实现
340 |         try:
341 |             # 如果提示词太长，进行截断
342 |             max_prompt_length = 65536  # 设置最大提示词长度
343 |             if len(prompt) > max_prompt_length:
344 |                 logging.warning(f"提示词过长 ({len(prompt)} 字符)，截断到 {max_prompt_length} 字符")
345 |                 prompt = prompt[:max_prompt_length]
346 |             
347 |             response = self.client.chat.completions.create(
348 |                 model=self.model_name,
349 |                 messages=[{"role": "user", "content": prompt}],
350 |                 max_tokens=max_tokens,
351 |                 temperature=0.7
352 |             )
353 |             
354 |             content = response.choices[0].message.content
355 |             if content is None:
356 |                 raise Exception("模型返回空内容")
357 |                 
358 |             logging.info(f"文本生成成功，返回内容长度: {len(content)}")
359 |             return content
360 |             
361 |         except Exception as e:
362 |             logging.error(f"OpenAI generation error: {str(e)}")
363 |             
364 |             # 如果是连接错误且配置了备用API，尝试使用备用API
365 |             if ("timeout" in str(e).lower() or "connection" in str(e).lower()) and self.fallback_api_key:
366 |                 logging.warning("检测到连接错误，尝试使用备用API...")
367 |                 fallback_client = self._create_fallback_client()
368 |                 if fallback_client:
369 |                     try:
370 |                         response = fallback_client.chat.completions.create(
371 |                             model=self.fallback_model_name,  # 使用备用模型名称
372 |                             messages=[{"role": "user", "content": prompt}],
373 |                             max_tokens=max_tokens,
374 |                             temperature=0.7
375 |                         )
376 |                         content = response.choices[0].message.content
377 |                         if content is None:
378 |                             raise Exception("备用模型返回空内容")
379 |                             
380 |                         logging.info(f"使用备用API生成成功，返回内容长度: {len(content)}")
381 |                         return content
382 |                     except Exception as fallback_error:
383 |                         logging.error(f"备用API也失败了: {str(fallback_error)}")
384 |             
385 |             if "timeout" in str(e).lower() or "connection" in str(e).lower():
386 |                 logging.warning("检测到超时或连接错误，将重试...")
387 |                 time.sleep(5)  # 等待5秒后重试
388 |             raise Exception(f"OpenAI generation error: {str(e)}")
389 |             
390 |     @retry(stop=stop_after_attempt(3), wait=wait_fixed(10))
391 |     def embed(self, text: str) -> np.ndarray:
392 |         """获取文本嵌入向量"""
393 |         logging.info(f"生成嵌入向量，文本长度: {len(text)}")
394 |         logging.info(f"使用模型: {self.model_name}")
395 |         
396 |         # 优先使用网络管理客户端
397 |         if NETWORK_AVAILABLE and self.network_client:
398 |             try:
399 |                 return self._use_network_client_for_embedding(text)
400 |             except Exception as e:
401 |                 logging.warning(f"网络管理客户端嵌入失败，回退到原始客户端: {str(e)}")
402 |         
403 |         # 回退到原始实现
404 |         try:
405 |             # 打印请求信息
406 |             request_data = {
407 |                 "model": self.model_name,
408 |                 "input": text[:100] + "..." if len(text) > 100 else text  # 只打印前100个字符
409 |             }
410 |             logging.info(f"Request data: {json.dumps(request_data, ensure_ascii=False)}")
411 |             
412 |             try:
413 |                 response = self.client.embeddings.create(
414 |                     model=self.model_name,
415 |                     input=text
416 |                 )
417 |                 
418 |                 # 打印响应信息
419 |                 if hasattr(response, 'data') and len(response.data) > 0:
420 |                     embedding = np.array(response.data[0].embedding)
421 |                     logging.info(f"Successfully generated embedding with dimension {len(embedding)}")
422 |                     return embedding
423 |                 else:
424 |                     logging.error("Response data is empty or invalid")
425 |                     logging.error(f"Response: {response}")
426 |                     raise Exception("Embedding response is empty or invalid")
427 |                     
428 |             except Exception as api_error:
429 |                 logging.error(f"API call failed: {str(api_error)}")
430 |                 # 检查是否有response属性（OpenAI API错误通常有）
431 |                 if hasattr(api_error, 'response') and api_error.response is not None:
432 |                     logging.error(f"Response status: {api_error.response.status_code}")
433 |                     logging.error(f"Response body: {api_error.response.text}")
434 |                 raise
435 |                 
436 |         except Exception as e:
437 |             logging.error(f"OpenAI embedding error: {str(e)}")
438 |             raise
439 |     
440 |     def close(self):
441 |         """关闭模型客户端"""
442 |         if NETWORK_AVAILABLE:
443 |             if self.network_client:
444 |                 self.network_client.close()
445 |             if self.fallback_network_client:
446 |                 self.fallback_network_client.close()
447 |         logging.debug("OpenAI model clients closed")
448 |     
449 |     def __del__(self):
450 |         """析构函数，确保资源清理"""
451 |         try:
452 |             self.close()
453 |         except:
454 |             pass 


--------------------------------------------------------------------------------
/src/tools/generate_config.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | import google.generativeai as genai
  5 | from google.api_core import exceptions as google_exceptions
  6 | from google.generativeai.types import HarmCategory, HarmBlockThreshold
  7 | 
  8 | # 导入您项目中的 AIConfig
  9 | # 假设脚本在 src/tools/ 目录下，需要调整路径以正确导入
 10 | # 如果从项目根目录运行，需要将 src 添加到 sys.path 或使用相对导入
 11 | try:
 12 |     # 尝试相对导入（如果脚本在 src/tools/ 并且从 src/ 运行）
 13 |     from ..config.ai_config import AIConfig
 14 | except (ImportError, ValueError):
 15 |     # 如果相对导入失败，尝试添加到 sys.path（假设从项目根目录运行）
 16 |     # 获取项目根目录（假设此脚本位于 src/tools/）
 17 |     project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 18 |     if project_root not in sys.path:
 19 |         sys.path.insert(0, project_root)
 20 |     try:
 21 |          from src.config.ai_config import AIConfig
 22 |     except ImportError as e:
 23 |         print(f"错误: 无法导入 AIConfig。请确保 PYTHONPATH 设置正确或从项目根目录运行。 {e}")
 24 |         sys.exit(1)
 25 | 
 26 | 
 27 | # --- LLM Configuration (Now fetched from AIConfig) ---
 28 | # LLM_MODEL variable is no longer needed here
 29 | 
 30 | # --- Safety Settings for Gemini ---
 31 | # 您可以根据需要调整这些安全设置
 32 | SAFETY_SETTINGS = {
 33 |     HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 34 |     HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 35 |     HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 36 |     HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 37 | }
 38 | 
 39 | 
 40 | def construct_llm_prompt(theme, config_structure_json):
 41 |     """构建用于填充 novel_config 的 LLM 提示词 (保持不变)"""
 42 |     try:
 43 |         config_structure = json.loads(config_structure_json)
 44 |         if 'theme' in config_structure:
 45 |             config_structure['theme'] = theme
 46 |         else:
 47 |             print("警告: novel_config 结构中未找到 'theme' 字段，可能导致 LLM 理解偏差。")
 48 |         formatted_structure_json = json.dumps(config_structure, ensure_ascii=False, indent=2)
 49 |     except json.JSONDecodeError:
 50 |         print("错误: 提供的 config_structure_json 不是有效的 JSON。")
 51 |         formatted_structure_json = config_structure_json
 52 | 
 53 |     prompt = f"""
 54 | 您是一个富有创造力的小说设定助手。
 55 | 根据以下提供的小说【主题】:
 56 | "{theme}"
 57 | 
 58 | 请详细填充下面的 JSON 结构中的 `novel_config` 部分。请将所有 "示例" 值、描述性文字（如 "示例力量体系或核心设定"）替换为与【主题】紧密相关、具体且有创意的设定。
 59 | 请确保输出是一个【完整且有效】的 JSON 对象，该对象代表填充后的 `novel_config` 的【值】（即，不包含最外层的 "novel_config": {{...}}，只输出大括号内的内容）。
 60 | 不要添加任何额外的解释或注释，只返回纯粹的 JSON 对象值。
 61 | 请严格使用双引号包裹所有的键和字符串值，确保输出是标准的 JSON 格式。
 62 | 
 63 | 模板结构如下:
 64 | ```json
 65 | {formatted_structure_json}
 66 | ```
 67 | 
 68 | 请严格按照上述 JSON 结构进行填充，并确保所有 "示例" 或占位符文本都被替换。返回填充后的 JSON 对象值。
 69 | """
 70 |     return prompt
 71 | 
 72 | def call_llm_to_fill_config(theme, novel_config_template):
 73 |     """调用配置好的 Gemini 模型填充 novel_config"""
 74 |     try:
 75 |         # 1. 初始化 AIConfig
 76 |         ai_config = AIConfig()
 77 | 
 78 |         # 2. 获取 Gemini Outline 模型配置
 79 |         gemini_outline_config = ai_config.get_gemini_config("outline")
 80 |         api_key = gemini_outline_config.get("api_key")
 81 |         model_name = gemini_outline_config.get("model_name")
 82 |         temperature = gemini_outline_config.get("temperature", 1.0) # 使用配置的 temperature
 83 | 
 84 |         if not api_key:
 85 |             print("错误: 未设置GEMINI_API_KEY环境变量或配置无效。")
 86 |             return None
 87 |         if not model_name:
 88 |              print("错误: 从 AIConfig 获取的 Gemini outline 模型名称为空。")
 89 |              return None
 90 | 
 91 |         # 3. 配置 Gemini 客户端
 92 |         genai.configure(api_key=api_key)
 93 | 
 94 |         # 4. 准备模型和生成配置
 95 |         generation_config = genai.GenerationConfig(
 96 |             temperature=temperature,
 97 |             # response_mime_type="application/json" # Gemini Flash 可能不支持强制 JSON 输出，依赖提示词
 98 |         )
 99 |         model = genai.GenerativeModel(
100 |             model_name=model_name,
101 |             generation_config=generation_config,
102 |             safety_settings=SAFETY_SETTINGS # 应用安全设置
103 |         )
104 | 
105 |         # 5. 构建提示
106 |         try:
107 |             template_json = json.dumps(novel_config_template, ensure_ascii=False, indent=2)
108 |         except TypeError:
109 |             print("错误: novel_config_template 无法序列化为 JSON。")
110 |             return None
111 |         prompt = construct_llm_prompt(theme, template_json)
112 |         print(f"\n正在调用 Gemini模型 ({model_name}) 生成详细配置，请稍候...")
113 |         # print("\n--- Prompt ---")
114 |         # print(prompt)
115 |         # print("--- End Prompt ---")
116 | 
117 | 
118 |         # 6. 调用 Gemini API
119 |         response = model.generate_content(prompt)
120 | 
121 |         # 7. 处理和解析响应
122 |         try:
123 |             # 检查是否有候选内容以及内容部分
124 |             if not response.candidates or not response.candidates[0].content or not response.candidates[0].content.parts:
125 |                  # 检查是否因为安全或其他原因被阻止
126 |                  if response.prompt_feedback.block_reason:
127 |                      print(f"错误: Gemini 请求被阻止，原因: {response.prompt_feedback.block_reason}")
128 |                      if response.prompt_feedback.safety_ratings:
129 |                          print("安全评分详情:")
130 |                          for rating in response.prompt_feedback.safety_ratings:
131 |                              print(f"- {rating.category}: {rating.probability}")
132 |                  else:
133 |                      print("错误: Gemini 返回的响应无效或为空。")
134 |                      print("原始响应:", response)
135 |                  return None
136 | 
137 |             # 提取文本内容
138 |             generated_text = response.text
139 |             # print("\n--- Raw LLM Response ---")
140 |             # print(generated_text)
141 |             # print("--- End Raw LLM Response ---")
142 | 
143 |             # 尝试去除可能的 Markdown 代码块标记
144 |             if generated_text.strip().startswith("```json"):
145 |                 generated_text = generated_text.strip()[7:]
146 |             if generated_text.strip().endswith("```"):
147 |                 generated_text = generated_text.strip()[:-3]
148 |             generated_text = generated_text.strip() # 去除首尾空白
149 | 
150 |             # 解析 JSON
151 |             filled_novel_config = json.loads(generated_text)
152 | 
153 |             if not isinstance(filled_novel_config, dict):
154 |                 print("错误: LLM 返回的不是有效的 JSON 对象。")
155 |                 print("解析后内容:", filled_novel_config)
156 |                 return None
157 | 
158 |             # 确保主题被正确保留或填充
159 |             if 'theme' not in filled_novel_config or not filled_novel_config['theme']:
160 |                  print("警告: LLM 返回的配置缺少 'theme'，将使用用户输入的主题。")
161 |                  filled_novel_config['theme'] = theme
162 |             elif filled_novel_config['theme'] != theme:
163 |                  print(f"提示: LLM 修改了主题，已修正为用户输入的主题: '{theme}'")
164 |                  filled_novel_config['theme'] = theme
165 | 
166 |             print("LLM 配置生成成功并已解析。")
167 |             return filled_novel_config
168 | 
169 |         except json.JSONDecodeError as e:
170 |             print(f"错误: 解析 LLM 返回内容失败，不是有效的 JSON: {e}")
171 |             print("原始返回文本:", generated_text) # 打印清理后的文本
172 |             return None
173 |         except AttributeError as e:
174 |              print(f"错误: 处理 Gemini 响应时出错: {e}. 可能响应结构不符合预期。")
175 |              print("原始响应:", response)
176 |              return None
177 |         except google_exceptions.GoogleAPIError as e:
178 |              print(f"错误: 调用 Gemini API 时发生 Google API 错误: {e}")
179 |              return None
180 |         except Exception as e:
181 |              print(f"解析或处理 LLM 响应时发生未知错误: {e}")
182 |              print("原始响应:", response)
183 |              return None
184 | 
185 | 
186 |     except ValueError as e: # 来自 AIConfig 的错误
187 |         print(f"错误: AI 配置无效: {e}")
188 |     except ImportError:
189 |          # AIConfig 导入失败的错误已在模块级别处理
190 |          pass
191 |     except Exception as e:
192 |         print(f"调用 LLM 或处理配置时发生未知错误: {e}")
193 | 
194 |     return None
195 | 
196 | 
197 | def generate_config_from_theme(theme_input, template_path="config.json.example", output_path="config.json"):
198 |     """
199 |     根据模板和用户输入的主题生成 config.json 文件，并使用配置的 Gemini 模型填充 novel_config。
200 |     """
201 |     try:
202 |         # 修正模板和输出路径，使其相对于脚本位置或项目根目录更可靠
203 |         script_dir = os.path.dirname(os.path.abspath(__file__))
204 |         project_root = os.path.dirname(os.path.dirname(script_dir)) # 退两层到项目根目录
205 | 
206 |         # 如果模板路径是相对的，则假定它相对于项目根目录
207 |         if not os.path.isabs(template_path):
208 |             template_path = os.path.join(project_root, template_path)
209 | 
210 |         # 如果输出路径是相对的，则假定它相对于项目根目录
211 |         if not os.path.isabs(output_path):
212 |             output_path = os.path.join(project_root, output_path)
213 | 
214 | 
215 |         if not os.path.exists(template_path):
216 |             print(f"错误: 模板文件 '{template_path}' 未找到。")
217 |             return
218 | 
219 |         with open(template_path, 'r', encoding='utf-8') as f:
220 |             config_data = json.load(f)
221 | 
222 |         if 'novel_config' not in config_data or not isinstance(config_data['novel_config'], dict):
223 |             print(f"错误: 模板文件 '{template_path}' 中未找到有效的 'novel_config' 部分或其格式不正确。")
224 |             config_data['novel_config'] = {"theme": theme_input, "type": "待填充", "style": "待填充"}
225 |             print("已创建基本的 novel_config 结构。")
226 | 
227 | 
228 |         novel_config_template = config_data.get('novel_config', {})
229 |         novel_config_template['theme'] = theme_input
230 | 
231 | 
232 |         # 调用 LLM 填充 novel_config
233 |         filled_novel_config = call_llm_to_fill_config(theme_input, novel_config_template)
234 | 
235 |         if filled_novel_config:
236 |             config_data['novel_config'] = filled_novel_config
237 |             print(f"\n使用 LLM 生成的内容更新了 'novel_config'。")
238 |         else:
239 |             print("\n未能从 LLM 获取有效的配置。将仅更新主题，其余保留模板值。")
240 |             config_data['novel_config']['theme'] = theme_input
241 | 
242 | 
243 |         with open(output_path, 'w', encoding='utf-8') as f:
244 |             json.dump(config_data, f, ensure_ascii=False, indent=2)
245 | 
246 |         print(f"\n成功生成配置文件 '{output_path}'。")
247 |         if filled_novel_config:
248 |              print("请检查生成的 novel_config 内容是否符合预期，并根据需要进行调整。")
249 |         print(f"文件中的其他顶级配置项（如路径）仍来自模板 '{template_path}'，")
250 |         print(f"请手动编辑 '{output_path}' 以设置实际值。")
251 | 
252 |     except json.JSONDecodeError:
253 |         print(f"错误: 模板文件 '{template_path}' 包含无效的 JSON。")
254 |     except IOError as e:
255 |         print(f"错误: 读写文件时发生错误: {e}")
256 |     except Exception as e:
257 |         print(f"生成配置文件时发生未知错误: {e}")
258 | 
259 | if __name__ == "__main__":
260 |     # 相对于项目根目录定位文件更稳健
261 |     project_root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
262 |     default_template_file = os.path.join(project_root_dir, "config.json.example")
263 |     default_output_file = os.path.join(project_root_dir, "config.json")
264 | 
265 |     # 检查模板文件是否存在
266 |     if not os.path.exists(default_template_file):
267 |          print(f"错误: 模板文件 '{default_template_file}' 未找到。")
268 |          print("请确保 'config.json.example' 文件位于项目根目录下。")
269 |          sys.exit(1)
270 | 
271 |     # 检查 .env 文件是否存在并包含 GEMINI_API_KEY (由 AIConfig 内部处理，这里仅作提示)
272 |     env_path = os.path.join(project_root_dir, ".env")
273 |     if not os.path.exists(env_path):
274 |         print("警告: 未找到 .env 文件。请确保已创建 .env 文件并设置了所需的 API 密钥。")
275 |     else:
276 |         # 简单检查下 .env 是否包含 GEMINI_API_KEY
277 |         with open(env_path, 'r') as f:
278 |             if 'GEMINI_API_KEY' not in f.read():
279 |                  print("警告: .env 文件中缺少必要的API密钥配置。请确保已正确设置。")
280 | 
281 | 
282 |     print(f"将使用模板 '{os.path.basename(default_template_file)}' 和 AIConfig 中的 Gemini 模型生成 '{os.path.basename(default_output_file)}'。")
283 |     user_theme = input("请输入您的小说主题: ")
284 | 
285 |     if user_theme:
286 |         # 传递绝对路径给函数
287 |         generate_config_from_theme(user_theme, default_template_file, default_output_file)
288 |     else:
289 |         print("未输入主题，操作已取消。")
290 | 


--------------------------------------------------------------------------------
/src/tools/generate_marketing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import json
  5 | import logging
  6 | 
  7 | # 添加项目根目录到 Python 路径
  8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
  9 | 
 10 | from src.config.config import Config
 11 | from src.models.gemini_model import GeminiModel
 12 | from src.models.openai_model import OpenAIModel
 13 | from src.generators.title_generator import TitleGenerator
 14 | 
 15 | def setup_logging():
 16 |     """设置日志"""
 17 |     # 获取项目根目录
 18 |     base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 19 |     # 定义日志目录路径
 20 |     log_dir = os.path.join(base_dir, "data", "logs")
 21 |     # 确保日志目录存在
 22 |     os.makedirs(log_dir, exist_ok=True)
 23 |     # 定义日志文件完整路径
 24 |     log_file_path = os.path.join(log_dir, "marketing_generation.log")
 25 | 
 26 |     logging.basicConfig(
 27 |         level=logging.INFO,
 28 |         format='%(asctime)s - %(levelname)s - %(message)s',
 29 |         handlers=[
 30 |             logging.StreamHandler(),
 31 |             # 使用完整的日志文件路径
 32 |             logging.FileHandler(log_file_path, encoding='utf-8')
 33 |         ]
 34 |     )
 35 | 
 36 | def create_model(model_config):
 37 |     """创建AI模型实例"""
 38 |     logging.info(f"正在创建模型: {model_config['type']} - {model_config['model_name']}")
 39 |     if model_config["type"] == "gemini":
 40 |         return GeminiModel(model_config)
 41 |     elif model_config["type"] == "openai":
 42 |         return OpenAIModel(model_config)
 43 |     elif model_config["type"] == "volcengine":
 44 |         return OpenAIModel(model_config)  # 火山引擎复用OpenAI兼容实现
 45 |     else:
 46 |         raise ValueError(f"不支持的模型类型: {model_config['type']}")
 47 | 
 48 | def load_chapter_summaries(summary_file):
 49 |     """加载章节摘要"""
 50 |     if not os.path.exists(summary_file):
 51 |         logging.warning(f"摘要文件不存在: {summary_file}")
 52 |         return []
 53 |         
 54 |     try:
 55 |         with open(summary_file, 'r', encoding='utf-8') as f:
 56 |             summaries = json.load(f)
 57 |             return list(summaries.values())
 58 |     except Exception as e:
 59 |         logging.error(f"加载摘要文件时出错: {str(e)}")
 60 |         return []
 61 | 
 62 | def main():
 63 |     parser = argparse.ArgumentParser(description="小说营销内容生成工具")
 64 |     parser.add_argument("--config", default="config.json", help="配置文件路径")
 65 |     parser.add_argument("--output_dir", default="data/marketing", help="输出目录")
 66 |     parser.add_argument("--summary_file", help="章节摘要文件路径")
 67 |     parser.add_argument("--keywords", nargs="+", help="额外的关键词")
 68 |     parser.add_argument("--characters", nargs="+", help="主要角色名")
 69 |     args = parser.parse_args()
 70 |     
 71 |     try:
 72 |         setup_logging()
 73 |         logging.info("开始生成小说营销内容...")
 74 |         
 75 |         # 加载配置
 76 |         config = Config()  # 不传递参数
 77 |         logging.info("配置加载完成")
 78 |         
 79 |         # 创建内容生成模型
 80 |         content_model = create_model(config.model_config["content_model"])
 81 |         logging.info("AI模型初始化完成")
 82 |         
 83 |         # 创建标题生成器
 84 |         generator = TitleGenerator(content_model, args.output_dir)
 85 |         
 86 |         # 加载章节摘要
 87 |         chapter_summaries = []
 88 |         if args.summary_file:
 89 |             chapter_summaries = load_chapter_summaries(args.summary_file)
 90 |             logging.info(f"已加载 {len(chapter_summaries)} 条章节摘要")
 91 |         elif hasattr(config, 'output_config') and 'output_dir' in config.output_config:
 92 |             summary_file = os.path.join(config.output_config['output_dir'], "summary.json")
 93 |             if os.path.exists(summary_file):
 94 |                 chapter_summaries = load_chapter_summaries(summary_file)
 95 |                 logging.info(f"已从默认位置加载 {len(chapter_summaries)} 条章节摘要")
 96 |         
 97 |         # 准备小说配置
 98 |         novel_config = {
 99 |             "type": config.novel_config.get("type", "玄幻"),
100 |             "theme": config.novel_config.get("theme", "修真逆袭"),
101 |             "keywords": args.keywords or config.novel_config.get("keywords", []),
102 |             "main_characters": args.characters or config.novel_config.get("main_characters", [])
103 |         }
104 |         
105 |         # 一键生成所有营销内容
106 |         result = generator.one_click_generate(novel_config, chapter_summaries)
107 |         
108 |         logging.info("营销内容生成完成！")
109 |         logging.info(f"结果已保存到：{result['saved_file']}")
110 |         
111 |         # 打印生成的内容摘要
112 |         print("\n===== 生成的营销内容摘要 =====")
113 |         print("\n【标题方案】")
114 |         for platform, title in result["titles"].items():
115 |             print(f"{platform}: {title}")
116 |             
117 |         print("\n【故事梗概】")
118 |         print(result["summary"])
119 |         
120 |         print("\n【封面提示词】")
121 |         for platform, prompt in result["cover_prompts"].items():
122 |             print(f"{platform}: {prompt}")
123 |         
124 |         if "cover_images" in result and result["cover_images"]:
125 |             print("\n【封面图片】")
126 |             for platform, image_path in result["cover_images"].items():
127 |                 print(f"{platform}: {image_path}")
128 |         
129 |         print("\n【已保存到】")
130 |         print(result["saved_file"])
131 |         
132 |     except Exception as e:
133 |         logging.error(f"生成营销内容时出错: {str(e)}")
134 |         raise
135 | 
136 | if __name__ == "__main__":
137 |     main() 


--------------------------------------------------------------------------------