Skip to content

Commit 74be281

Browse files
authored
Merge pull request #295 from InvolutionHell/feat/assistant-ratelimit-guard
feat(assistant): AI 接口三层防刷护甲 — 前端约束 + Upstash 限流 + 上游错误兜底
2 parents e6b2aa4 + 8b734fe commit 74be281

8 files changed

Lines changed: 406 additions & 2 deletions

File tree

.env.sample

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,20 @@ INTERN_KEY=
3030
# 在 https://open.bigmodel.cn/ 注册后获取
3131
ZHIPU_API_KEY=
3232

33+
# Upstash Redis(Serverless Redis over HTTP)—— 给 AI 接口做 per-IP rate limit
34+
# 免费模型 GLM-4.6V-Flash 并发极低(≈5),不限流单用户就能打爆。
35+
#
36+
# 获取方式(任选其一):
37+
# 1. 在 https://console.upstash.com/ 手动建 Redis 库,直接复制 REST URL / Token
38+
# 2. Vercel Project → Integrations → Upstash → 一键绑定(env 会自动注入项目)
39+
#
40+
# 代码会自动识别以下三种命名(按优先级):
41+
# a. UPSTASH_REDIS_REST_URL / _TOKEN (手动建推荐)
42+
# b. UPSTASH_REDIS_REST_KV_REST_API_URL / _TOKEN (Vercel 集成 + 自定义 prefix)
43+
# c. KV_REST_API_URL / _TOKEN (Vercel 集成 + 默认无 prefix)
44+
# 未配置时限流会自动降级为放行 + 一次 warn,不会阻塞接口。
45+
UPSTASH_REDIS_REST_URL=
46+
UPSTASH_REDIS_REST_TOKEN=
3347
# Sentry 错误监控(Developer plan 免费 5K errors / 10K perf units / 月)
3448
# NEXT_PUBLIC_SENTRY_DSN 是浏览器端需要的公开 DSN,暴露在前端 bundle 里属于设计,
3549
# SENTRY_AUTH_TOKEN 仅用于 next build 时上传 source map,私密。

app/api/chat/route.ts

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { streamText, UIMessage, convertToModelMessages } from "ai";
33
import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
44
import { buildSystemMessage } from "@/lib/ai/prompt";
55
import { source } from "@/lib/source";
6+
import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
67
import fs from "fs/promises";
78
import path from "path";
89

@@ -29,6 +30,22 @@ interface ChatRequest {
2930
import { resolveUserId } from "@/lib/server-auth";
3031

3132
export async function POST(req: Request) {
33+
// 0. Rate limit:免费模型 GLM-4.6V-Flash 并发极低(≈ 5),
34+
// 单用户开几个 tab 就能打爆。per-IP 滑动窗口限流先挡一层。
35+
// (L2 防护;如果 Upstash env 漏配会自动降级为放行+warn)
36+
//
37+
// 预读 body 判断是否带图(hasImage=true 会触发更严的 5 req/60s 窗口)。
38+
// 为此多克一次请求,后续 proxyReq/req.json() 仍可独立读(Copilot CR #4)。
39+
let hasImage = false;
40+
try {
41+
const body = (await req.clone().json()) as Partial<ChatRequest>;
42+
hasImage = messagesHaveImage(body.messages);
43+
} catch {
44+
// body 不是合法 JSON:按无图处理,继续让下游的 req.json() 去报真正的错
45+
}
46+
const rl = await limitChat(req, hasImage);
47+
if (!rl.success) return rateLimitResponse(rl);
48+
3249
// 1. 克隆请求,因为如果代理失败,后面的代码还需要读取 req.json()
3350
const proxyReq = req.clone();
3451

@@ -234,13 +251,114 @@ export async function POST(req: Request) {
234251
return Response.json({ error: error.message }, { status: 400 });
235252
}
236253

254+
// 识别上游(智谱 GLM)限流/欠费/鉴权错误,给出结构化 code 让前端友好提示。
255+
// 智谱业务码参考:
256+
// 1302 - 接口请求并发超额(与 HTTP 429 对应)
257+
// 1113 - 账户余额不足 / 免费额度耗尽
258+
// 1001/1002/1003 - 鉴权失败
259+
const mapped = mapUpstreamError(error);
260+
if (mapped) {
261+
return Response.json(
262+
{ error: mapped.message, code: mapped.code },
263+
{ status: mapped.status },
264+
);
265+
}
266+
237267
return Response.json(
238268
{ error: "Failed to process chat request" },
239269
{ status: 500 },
240270
);
241271
}
242272
}
243273

274+
/**
275+
* 判断一组 UIMessage 里是否含图片 part。支持 AI SDK v5 的多种图片表达:
276+
* `type === "image"` / `type === "image_url"` / `type === "file"` 且 mediaType 起头 image。
277+
* 任何异常结构都当作无图,宁可放过也不误杀。
278+
*/
279+
function messagesHaveImage(messages: unknown): boolean {
280+
if (!Array.isArray(messages)) return false;
281+
return messages.some((msg) => {
282+
if (!msg || typeof msg !== "object") return false;
283+
const parts = (msg as { parts?: unknown }).parts;
284+
if (!Array.isArray(parts)) return false;
285+
return parts.some((part) => {
286+
if (!part || typeof part !== "object") return false;
287+
const type = (part as { type?: unknown }).type;
288+
if (type === "image" || type === "image_url") return true;
289+
if (type === "file") {
290+
const mediaType = (part as { mediaType?: unknown }).mediaType;
291+
return typeof mediaType === "string" && mediaType.startsWith("image/");
292+
}
293+
return false;
294+
});
295+
});
296+
}
297+
298+
interface MappedUpstreamError {
299+
status: number;
300+
code: "rate_limited" | "quota_exhausted" | "upstream_auth" | "upstream_down";
301+
message: string;
302+
}
303+
304+
function mapUpstreamError(err: unknown): MappedUpstreamError | null {
305+
if (!err) return null;
306+
307+
// 仅使用 message / response payload,**不要拼 stack** —— stack 里带行号
308+
// 形如 `:429:` / `:1302:` 会误匹配业务码正则(Copilot CR #5)。
309+
// JSON.stringify 对循环引用会抛错,用 try/catch 兜底(Copilot CR #6)。
310+
let raw: string;
311+
if (err instanceof Error) {
312+
raw = err.message;
313+
} else if (typeof err === "string") {
314+
raw = err;
315+
} else {
316+
try {
317+
raw = JSON.stringify(err);
318+
} catch {
319+
raw = String(err);
320+
}
321+
}
322+
323+
// 业务码正则:全部用 `[^\s]{0,N}?` 代替 `.*`,限死回溯深度避免 ReDoS
324+
// (CodeQL polynomial regex 告警)。关键词语义够短,10~20 字符窗口足够。
325+
const hasStatus429 = /\b429\b|rate[-_ ]?limit|too many requests/i.test(raw);
326+
const has1302 = /\b1302\b|||/.test(raw);
327+
const has1113 =
328+
/\b1113\b||[^\s]{0,10}?|quota[^\s]{0,10}?exhaust/i.test(
329+
raw,
330+
);
331+
const hasAuth =
332+
/\b1001\b|\b1002\b|\b1003\b|\b401\b|unauthorized|invalid[^\s]{0,10}?api[^\s]{0,10}?key/i.test(
333+
raw,
334+
);
335+
336+
if (has1302 || hasStatus429) {
337+
return {
338+
status: 429,
339+
code: "rate_limited",
340+
message: "AI 服务被挤爆了,排队中,请 30 秒后再试。(上游并发限流)",
341+
};
342+
}
343+
if (has1113) {
344+
return {
345+
status: 503,
346+
code: "quota_exhausted",
347+
message:
348+
"免费模型今日额度已用完,请明天再来,或在设置里切到你自己的 OpenAI/Gemini。",
349+
};
350+
}
351+
if (hasAuth) {
352+
return {
353+
status: 502,
354+
code: "upstream_auth",
355+
message:
356+
"AI 服务密钥配置异常,站点管理员已收到通知。请稍后重试或切换到自有 API Key。",
357+
};
358+
}
359+
return null;
360+
}
361+
244362
// 提取纯文本内容,过滤掉 MDX 语法
245363
function extractTextFromMDX(content: string): string {
246364
let text = content

app/api/suggestions/route.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { generateText } from "ai";
22
import { unstable_cache } from "next/cache";
33
import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
44
import { createGlmFlashModel } from "@/lib/ai/providers/glm";
5+
import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
56

67
// 允许流式响应最长30秒
78
export const maxDuration = 30;
@@ -20,6 +21,10 @@ interface SuggestionsRequest {
2021
}
2122

2223
export async function POST(req: Request) {
24+
// Rate limit:suggestions 也打 LLM,共用同一 IP 额度池
25+
const rl = await limitChat(req, false);
26+
if (!rl.success) return rateLimitResponse(rl);
27+
2328
try {
2429
const {
2530
messages,

app/components/DocsAssistant.tsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,11 +465,15 @@ function deriveAssistantError(
465465
? message
466466
: `The ${providerLabel} API key looks incorrect. Update it in settings and try again.`;
467467
} else if (statusCode === 429) {
468+
// 优先用服务端返回的中文友好提示(rate_limited / quota_exhausted),
469+
// 只在服务端没给消息时才兜底到默认英文文案
468470
friendlyMessage =
469-
"The provider is rate limiting requests. Please wait and try again.";
471+
message && message.length > 0 ? message : "请求太频繁,请稍等片刻再试。";
470472
} else if (statusCode && statusCode >= 500) {
471473
friendlyMessage =
472-
"The AI provider is currently unavailable. Please try again soon.";
474+
message && message.length > 0
475+
? message
476+
: "AI 服务暂时不可用,请稍后再试。";
473477
}
474478

475479
return {

app/components/assistant-ui/thread.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ const Composer: FC<ComposerProps> = ({
395395
autoFocus
396396
aria-label="Message input"
397397
disabled={!hasActiveKey}
398+
// 单条消息硬上限 4000 字符:防 token bomb,保护免费模型额度
399+
maxLength={4000}
398400
/>
399401
<ComposerAction
400402
canSend={hasActiveKey}

0 commit comments

Comments
 (0)