Skip to content

Commit f6be7ec

Browse files
committed
docs: Enhance configuration management in README and update config handling in code to support saved configurations with higher priority
1 parent 47e890b commit f6be7ec

File tree

5 files changed

+199
-63
lines changed

5 files changed

+199
-63
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,14 @@ docker compose up -d --build
165165

166166
### 📋 配置说明
167167

168-
使用官方 Docker 镜像时,需要在 docker-compose.dockerhub.yml 文件中配置环境变量传递给容器。系统支持优先级为:**Docker 环境变量 > .env 文件 > JSON 配置文件**
168+
使用官方 Docker 镜像时,需要在 docker-compose.dockerhub.yml 文件中配置环境变量传递给容器。系统支持灵活的配置优先级:
169+
170+
**配置优先级**`.saved.json 文件` > `Docker 环境变量` > `.env 文件` > `JSON 配置文件` > `默认值`
171+
172+
- **`.saved.json 文件`**:通过前端界面修改配置时动态生成,优先级最高,确保用户修改的配置能够生效
173+
- **Docker 环境变量**:适合生产环境部署,通过 docker-compose 文件设置
174+
- **`.env 文件`**:适合开发环境,存储敏感信息如 API 密钥
175+
- **JSON 配置文件\*\***:基础配置模板,支持复杂的嵌套配置结构
169176

170177
**完整的环境变量列表请查看:**
171178

README_ENGLISH.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,22 @@ If you want to understand the details of each module or perform secondary develo
137137
- MySQL
138138
- Qdrant
139139

140+
### 📋 Configuration Instructions
141+
142+
When using official Docker images, you need to configure environment variables in the docker-compose.dockerhub.yml file to pass them to containers. The system supports flexible configuration priority:
143+
144+
**Configuration Priority**: `.saved.json files` > `Docker environment variables` > `.env files` > `JSON config files` > `default values`
145+
146+
- **`.saved.json files`**: Dynamically generated when modifying configuration through the frontend interface, highest priority, ensuring user-modified configurations take effect
147+
- **Docker environment variables**: Suitable for production deployment, set through docker-compose files
148+
- **`.env files`**: Suitable for development environments, storing sensitive information like API keys
149+
- **JSON config files**: Base configuration templates, supporting complex nested configuration structures
150+
151+
**For complete environment variable lists, see:**
152+
153+
- RAG Service: [`sources/gc-qa-rag-server/env.example`](./sources/gc-qa-rag-server/env.example)
154+
- ETL Service: [`sources/gc-qa-rag-etl/env.example`](./sources/gc-qa-rag-etl/env.example)
155+
140156
**Important**: Regardless of which deployment method you choose, you need to configure API keys first!
141157

142158
For detailed steps, please refer to our [Quick Start Guide](./quickstart_ENGLISH.md).

sources/gc-qa-rag-etl/etlapp/common/config.py

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,29 @@ class VectorDbConfig:
3232
host: str
3333

3434

35-
def _get_config_value(key: str, config_raw: dict, default: Optional[str] = None) -> str:
36-
"""Get configuration value with priority: ENV > .env > JSON."""
37-
# First check environment variables
38-
env_value = os.getenv(key)
39-
if env_value is not None:
40-
return env_value
41-
42-
# Then check nested JSON structure
35+
def _get_config_value(key: str, config_raw: dict, saved_config_raw: dict, default: Optional[str] = None) -> str:
36+
"""Get configuration value with priority: saved.json > ENV > .env > JSON."""
37+
# First check saved configuration (highest priority)
4338
keys = key.lower().split('_')
4439
# Skip the 'gc_qa_rag' prefix for JSON lookup
4540
if len(keys) >= 4 and keys[0] == 'gc' and keys[1] == 'qa' and keys[2] == 'rag':
4641
keys = keys[3:]
4742

43+
# Check saved config first
44+
current = saved_config_raw
45+
try:
46+
for k in keys:
47+
current = current[k]
48+
return str(current)
49+
except (KeyError, TypeError):
50+
pass
51+
52+
# Then check environment variables
53+
env_value = os.getenv(key)
54+
if env_value is not None:
55+
return env_value
56+
57+
# Then check nested JSON structure
4858
current = config_raw
4959
try:
5060
for k in keys:
@@ -60,9 +70,9 @@ def _get_config_value(key: str, config_raw: dict, default: Optional[str] = None)
6070
raise ValueError(f"Configuration value not found for key: {key}")
6171

6272

63-
def _get_config_int(key: str, config_raw: dict, default: Optional[int] = None) -> int:
64-
"""Get integer configuration value with priority: ENV > .env > JSON."""
65-
value = _get_config_value(key, config_raw, str(default) if default is not None else None)
73+
def _get_config_int(key: str, config_raw: dict, saved_config_raw: dict, default: Optional[int] = None) -> int:
74+
"""Get integer configuration value with priority: saved.json > ENV > .env > JSON."""
75+
value = _get_config_value(key, config_raw, saved_config_raw, str(default) if default is not None else None)
6676
try:
6777
return int(value)
6878
except (ValueError, TypeError):
@@ -97,27 +107,38 @@ def from_environment(cls, environment: str) -> "Config":
97107
except json.JSONDecodeError as e:
98108
print(f"Warning: Invalid JSON in configuration file: {e}")
99109

110+
# Try to load saved config (highest priority)
111+
saved_config_raw = {}
112+
saved_config_path = Path(f".config.{environment}.saved.json")
113+
if saved_config_path.exists():
114+
try:
115+
with open(saved_config_path) as f:
116+
saved_config_raw = json.load(f)
117+
print(f"Loaded saved configuration from: {saved_config_path}")
118+
except json.JSONDecodeError as e:
119+
print(f"Warning: Invalid JSON in saved configuration file: {e}")
120+
100121
return cls(
101122
environment=environment,
102123
das=DasConfig(
103-
base_url_page=_get_config_value("GC_QA_RAG_DAS_BASE_URL_PAGE", config_raw, ""),
104-
base_url_thread=_get_config_value("GC_QA_RAG_DAS_BASE_URL_THREAD", config_raw, ""),
105-
token=_get_config_value("GC_QA_RAG_DAS_TOKEN", config_raw, ""),
124+
base_url_page=_get_config_value("GC_QA_RAG_DAS_BASE_URL_PAGE", config_raw, saved_config_raw, ""),
125+
base_url_thread=_get_config_value("GC_QA_RAG_DAS_BASE_URL_THREAD", config_raw, saved_config_raw, ""),
126+
token=_get_config_value("GC_QA_RAG_DAS_TOKEN", config_raw, saved_config_raw, ""),
106127
),
107128
llm=LlmConfig(
108-
api_key=_get_config_value("GC_QA_RAG_LLM_API_KEY", config_raw),
109-
api_base=_get_config_value("GC_QA_RAG_LLM_API_BASE", config_raw, "https://dashscope.aliyuncs.com/compatible-mode/v1"),
110-
model_name=_get_config_value("GC_QA_RAG_LLM_MODEL_NAME", config_raw, "qwen-plus"),
111-
max_rpm=_get_config_int("GC_QA_RAG_LLM_MAX_RPM", config_raw, 100),
129+
api_key=_get_config_value("GC_QA_RAG_LLM_API_KEY", config_raw, saved_config_raw),
130+
api_base=_get_config_value("GC_QA_RAG_LLM_API_BASE", config_raw, saved_config_raw, "https://dashscope.aliyuncs.com/compatible-mode/v1"),
131+
model_name=_get_config_value("GC_QA_RAG_LLM_MODEL_NAME", config_raw, saved_config_raw, "qwen-plus"),
132+
max_rpm=_get_config_int("GC_QA_RAG_LLM_MAX_RPM", config_raw, saved_config_raw, 100),
112133
),
113134
embedding=EmbeddingConfig(
114-
api_key=_get_config_value("GC_QA_RAG_EMBEDDING_API_KEY", config_raw)
135+
api_key=_get_config_value("GC_QA_RAG_EMBEDDING_API_KEY", config_raw, saved_config_raw)
115136
),
116137
vector_db=VectorDbConfig(
117-
host=_get_config_value("GC_QA_RAG_VECTOR_DB_HOST", config_raw, "http://host.docker.internal:6333")
138+
host=_get_config_value("GC_QA_RAG_VECTOR_DB_HOST", config_raw, saved_config_raw, "http://host.docker.internal:6333")
118139
),
119-
root_path=_get_config_value("GC_QA_RAG_ROOT_PATH", config_raw, user_cache_dir("gc-qa-rag", ensure_exists=True)),
120-
log_path=_get_config_value("GC_QA_RAG_LOG_PATH", config_raw, user_log_dir("gc-qa-rag", ensure_exists=True)),
140+
root_path=_get_config_value("GC_QA_RAG_ROOT_PATH", config_raw, saved_config_raw, user_cache_dir("gc-qa-rag", ensure_exists=True)),
141+
log_path=_get_config_value("GC_QA_RAG_LOG_PATH", config_raw, saved_config_raw, user_log_dir("gc-qa-rag", ensure_exists=True)),
121142
)
122143

123144

sources/gc-qa-rag-etl/etlapp_api/routers/config.py

Lines changed: 73 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,77 @@ def dataclass_to_dict(obj):
2323

2424
@config_router.post("/update_config")
2525
async def update_config_api(request: Request):
26-
data = await request.json()
27-
from etlapp.common.config import app_config
26+
try:
27+
data = await request.json()
28+
from etlapp.common.config import app_config
29+
30+
# 创建保存的配置文件路径
31+
saved_config_path = Path(f".config.{app_config.environment}.saved.json")
32+
33+
# 如果保存的配置文件已存在,先加载现有内容
34+
saved_config_raw = {}
35+
if saved_config_path.exists():
36+
try:
37+
with open(saved_config_path, "r", encoding="utf-8") as f:
38+
saved_config_raw = json.load(f)
39+
except json.JSONDecodeError:
40+
# 如果文件损坏,从空配置开始
41+
saved_config_raw = {}
42+
43+
# 更新配置数据
44+
for key in ["llm", "embedding", "vector_db", "root_path", "log_path", "das"]:
45+
if key in data:
46+
saved_config_raw[key] = data[key]
47+
48+
# 写入保存的配置文件
49+
with open(saved_config_path, "w", encoding="utf-8") as f:
50+
json.dump(saved_config_raw, f, ensure_ascii=False, indent=4)
51+
52+
# 重新加载配置
53+
from etlapp.common.config import reload_config
54+
reload_config()
2855

29-
config_path = Path(f".config.{app_config.environment}.json")
30-
if not config_path.exists():
31-
return JSONResponse(status_code=404, content={"error": "Config file not found"})
32-
with open(config_path, "r", encoding="utf-8") as f:
33-
config_raw = json.load(f)
34-
for key in ["llm", "embedding", "vector_db", "root_path", "log_path"]:
35-
if key in data:
36-
config_raw[key] = data[key]
37-
with open(config_path, "w", encoding="utf-8") as f:
38-
json.dump(config_raw, f, ensure_ascii=False, indent=4)
39-
# reload configuration
40-
from etlapp.common.config import reload_config
41-
42-
reload_config()
43-
return {"msg": "Config updated"}
56+
return {
57+
"msg": "Config updated successfully",
58+
"saved_to": str(saved_config_path),
59+
"note": "Configuration has been saved to .saved.json file with highest priority"
60+
}
61+
62+
except Exception as e:
63+
return JSONResponse(
64+
status_code=500,
65+
content={"error": f"Failed to update configuration: {str(e)}"}
66+
)
67+
68+
69+
@config_router.delete("/reset_config")
70+
async def reset_config_api():
71+
"""删除保存的配置文件,恢复到默认配置优先级"""
72+
try:
73+
from etlapp.common.config import app_config
74+
75+
saved_config_path = Path(f".config.{app_config.environment}.saved.json")
76+
77+
if saved_config_path.exists():
78+
saved_config_path.unlink() # 删除文件
79+
80+
# 重新加载配置
81+
from etlapp.common.config import reload_config
82+
reload_config()
83+
84+
return {
85+
"msg": "Saved configuration reset successfully",
86+
"deleted_file": str(saved_config_path),
87+
"note": "Configuration priority has been restored to: ENV > .env > JSON"
88+
}
89+
else:
90+
return {
91+
"msg": "No saved configuration file found",
92+
"note": "Already using default configuration priority"
93+
}
94+
95+
except Exception as e:
96+
return JSONResponse(
97+
status_code=500,
98+
content={"error": f"Failed to reset configuration: {str(e)}"}
99+
)

sources/gc-qa-rag-server/ragapp/common/config.py

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,29 @@ class DbConfig:
2929
connection_string: str
3030

3131

32-
def _get_config_value(key: str, config_raw: dict, default: Optional[str] = None) -> str:
33-
"""Get configuration value with priority: ENV > .env > JSON."""
34-
# First check environment variables
35-
env_value = os.getenv(key)
36-
if env_value is not None:
37-
return env_value
38-
39-
# Then check nested JSON structure
32+
def _get_config_value(key: str, config_raw: dict, saved_config_raw: dict, default: Optional[str] = None) -> str:
33+
"""Get configuration value with priority: saved.json > ENV > .env > JSON."""
34+
# First check saved configuration (highest priority)
4035
keys = key.lower().split('_')
4136
# Skip the 'gc_qa_rag' prefix for JSON lookup
4237
if keys[0] == 'gc' and keys[1] == 'qa' and keys[2] == 'rag':
4338
keys = keys[3:]
4439

40+
# Check saved config first
41+
current = saved_config_raw
42+
try:
43+
for k in keys:
44+
current = current[k]
45+
return str(current)
46+
except (KeyError, TypeError):
47+
pass
48+
49+
# Then check environment variables
50+
env_value = os.getenv(key)
51+
if env_value is not None:
52+
return env_value
53+
54+
# Then check nested JSON structure
4555
current = config_raw
4656
try:
4757
for k in keys:
@@ -58,10 +68,25 @@ def _get_config_value(key: str, config_raw: dict, default: Optional[str] = None)
5868

5969

6070
def _get_llm_config(
61-
config_raw: dict, config_type: str, default_config: Optional[LlmConfig] = None
71+
config_raw: dict, saved_config_raw: dict, config_type: str, default_config: Optional[LlmConfig] = None
6272
) -> LlmConfig:
6373
"""Get LLM configuration with fallback to default config if specified config doesn't exist."""
64-
# Try to get from environment variables first
74+
# Try to get from saved config first (highest priority)
75+
if config_type in saved_config_raw:
76+
saved_config = saved_config_raw[config_type]
77+
api_key = saved_config.get("api_key")
78+
api_base = saved_config.get("api_base")
79+
model_name = saved_config.get("model_name")
80+
81+
# If all saved config values are set, use them
82+
if api_key and api_base and model_name:
83+
return LlmConfig(
84+
api_key=api_key,
85+
api_base=api_base,
86+
model_name=model_name,
87+
)
88+
89+
# Try to get from environment variables
6590
env_prefix = f"GC_QA_RAG_{config_type.upper()}"
6691
api_key = os.getenv(f"{env_prefix}_API_KEY")
6792
api_base = os.getenv(f"{env_prefix}_API_BASE")
@@ -94,9 +119,9 @@ def _get_llm_config(
94119

95120
# Last resort: use default LLM config from env/JSON
96121
return LlmConfig(
97-
api_key=api_key or _get_config_value("GC_QA_RAG_LLM_DEFAULT_API_KEY", config_raw),
98-
api_base=api_base or _get_config_value("GC_QA_RAG_LLM_DEFAULT_API_BASE", config_raw, "https://dashscope.aliyuncs.com/compatible-mode/v1"),
99-
model_name=model_name or _get_config_value("GC_QA_RAG_LLM_DEFAULT_MODEL_NAME", config_raw, "qwen-plus"),
122+
api_key=api_key or _get_config_value("GC_QA_RAG_LLM_DEFAULT_API_KEY", config_raw, saved_config_raw),
123+
api_base=api_base or _get_config_value("GC_QA_RAG_LLM_DEFAULT_API_BASE", config_raw, saved_config_raw, "https://dashscope.aliyuncs.com/compatible-mode/v1"),
124+
model_name=model_name or _get_config_value("GC_QA_RAG_LLM_DEFAULT_MODEL_NAME", config_raw, saved_config_raw, "qwen-plus"),
100125
)
101126

102127

@@ -130,27 +155,38 @@ def from_environment(cls, environment: str) -> "Config":
130155
except json.JSONDecodeError as e:
131156
print(f"Warning: Invalid JSON in configuration file: {e}")
132157

158+
# Try to load saved config (highest priority)
159+
saved_config_raw = {}
160+
saved_config_path = Path(f".config.{environment}.saved.json")
161+
if saved_config_path.exists():
162+
try:
163+
with open(saved_config_path) as f:
164+
saved_config_raw = json.load(f)
165+
print(f"Loaded saved configuration from: {saved_config_path}")
166+
except json.JSONDecodeError as e:
167+
print(f"Warning: Invalid JSON in saved configuration file: {e}")
168+
133169
# Initialize default config first
134-
llm_default = _get_llm_config(config_raw, "llm_default")
170+
llm_default = _get_llm_config(config_raw, saved_config_raw, "llm_default")
135171

136172
return cls(
137173
environment=environment,
138174
llm_default=llm_default,
139-
llm_summary=_get_llm_config(config_raw, "llm_summary", llm_default),
140-
llm_think=_get_llm_config(config_raw, "llm_think", llm_default),
141-
llm_query=_get_llm_config(config_raw, "llm_query", llm_default),
142-
llm_research=_get_llm_config(config_raw, "llm_research", llm_default),
175+
llm_summary=_get_llm_config(config_raw, saved_config_raw, "llm_summary", llm_default),
176+
llm_think=_get_llm_config(config_raw, saved_config_raw, "llm_think", llm_default),
177+
llm_query=_get_llm_config(config_raw, saved_config_raw, "llm_query", llm_default),
178+
llm_research=_get_llm_config(config_raw, saved_config_raw, "llm_research", llm_default),
143179
embedding=EmbeddingConfig(
144-
api_key=_get_config_value("GC_QA_RAG_EMBEDDING_API_KEY", config_raw)
180+
api_key=_get_config_value("GC_QA_RAG_EMBEDDING_API_KEY", config_raw, saved_config_raw)
145181
),
146182
vector_db=VectorDbConfig(
147-
host=_get_config_value("GC_QA_RAG_VECTOR_DB_HOST", config_raw, "http://rag_qdrant_container:6333")
183+
host=_get_config_value("GC_QA_RAG_VECTOR_DB_HOST", config_raw, saved_config_raw, "http://rag_qdrant_container:6333")
148184
),
149185
db=DbConfig(
150-
connection_string=_get_config_value("GC_QA_RAG_DB_CONNECTION_STRING", config_raw, "mysql+mysqlconnector://root:12345678@rag_mysql_container:3306/search_db")
186+
connection_string=_get_config_value("GC_QA_RAG_DB_CONNECTION_STRING", config_raw, saved_config_raw, "mysql+mysqlconnector://root:12345678@rag_mysql_container:3306/search_db")
151187
),
152-
log_path=_get_config_value("GC_QA_RAG_LOG_PATH", config_raw, user_log_dir("gc-qa-rag-server", ensure_exists=True)),
153-
etl_base_url=_get_config_value("GC_QA_RAG_ETL_BASE_URL", config_raw, "http://host.docker.internal:8001"),
188+
log_path=_get_config_value("GC_QA_RAG_LOG_PATH", config_raw, saved_config_raw, user_log_dir("gc-qa-rag-server", ensure_exists=True)),
189+
etl_base_url=_get_config_value("GC_QA_RAG_ETL_BASE_URL", config_raw, saved_config_raw, "http://host.docker.internal:8001"),
154190
)
155191

156192

0 commit comments

Comments
 (0)