Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions _public/static/function/css/video.css
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,8 @@ body {
.ref-name {
font-size: 11px;
color: var(--accents-4);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
white-space: pre-wrap;
overflow-wrap: anywhere;
max-width: 100%;
display: inline-block;
}
Expand Down Expand Up @@ -253,6 +252,18 @@ body {
resize: vertical;
}

.prompt-tip {
margin-top: 8px;
font-size: 11px;
color: var(--accents-4);
line-height: 1.5;
}

.ref-textarea {
min-height: 88px;
resize: vertical;
}

.status-header {
display: flex;
align-items: center;
Expand Down
100 changes: 71 additions & 29 deletions _public/static/function/js/video.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@
let contentBuffer = '';
let collectingContent = false;
let startAt = 0;
let fileDataUrl = '';
let fileDataUrls = [];
let elapsedTimer = null;
let lastProgress = 0;
let currentPreviewItem = null;
let previewCount = 0;
const DEFAULT_REASONING_EFFORT = 'low';
const MAX_REFERENCE_IMAGES = 7;

function toast(message, type) {
if (typeof showToast === 'function') {
Expand Down Expand Up @@ -229,15 +230,45 @@
}

function clearFileSelection() {
fileDataUrl = '';
fileDataUrls = [];
if (imageFileInput) {
imageFileInput.value = '';
}
if (imageFileName) {
imageFileName.textContent = t('common.noFileSelected');
imageFileName.textContent = t('video.noReferenceSelected');
}
}

function updateReferenceSummary(names) {
if (!imageFileName) return;
if (!names || !names.length) {
imageFileName.textContent = t('video.noReferenceSelected');
return;
}
imageFileName.textContent = names.join('\n');
}

function parseReferenceUrls(value) {
return (value || '')
.split(/\r?\n/)
.map(item => item.trim())
.filter(Boolean);
}

function getReferenceImages() {
const rawUrls = imageUrlInput ? parseReferenceUrls(imageUrlInput.value) : [];
if (fileDataUrls.length && rawUrls.length) {
toast(t('video.referenceConflict'), 'error');
throw new Error('invalid_reference');
}
const images = fileDataUrls.length ? [...fileDataUrls] : rawUrls;
if (images.length > MAX_REFERENCE_IMAGES) {
toast(t('video.referenceLimit'), 'error');
throw new Error('too_many_references');
}
return images;
}

function normalizeAuthHeader(authHeader) {
if (!authHeader) return '';
if (authHeader.startsWith('Bearer ')) {
Expand All @@ -260,12 +291,7 @@

async function createVideoTask(authHeader) {
const prompt = promptInput ? promptInput.value.trim() : '';
const rawUrl = imageUrlInput ? imageUrlInput.value.trim() : '';
if (fileDataUrl && rawUrl) {
toast(t('video.referenceConflict'), 'error');
throw new Error('invalid_reference');
}
const imageUrl = fileDataUrl || rawUrl;
const imageUrls = getReferenceImages();
const res = await fetch('/v1/function/video/start', {
method: 'POST',
headers: {
Expand All @@ -274,7 +300,7 @@
},
body: JSON.stringify({
prompt,
image_url: imageUrl || null,
image_urls: imageUrls,
reasoning_effort: DEFAULT_REASONING_EFFORT,
aspect_ratio: ratioSelect ? ratioSelect.value : '3:2',
video_length: lengthSelect ? parseInt(lengthSelect.value, 10) : 6,
Expand Down Expand Up @@ -604,31 +630,38 @@

if (imageFileInput) {
imageFileInput.addEventListener('change', () => {
const file = imageFileInput.files && imageFileInput.files[0];
if (!file) {
const files = imageFileInput.files ? Array.from(imageFileInput.files) : [];
if (!files.length) {
clearFileSelection();
return;
}
if (files.length > MAX_REFERENCE_IMAGES) {
clearFileSelection();
toast(t('video.referenceLimit'), 'error');
return;
}
if (imageUrlInput && imageUrlInput.value.trim()) {
imageUrlInput.value = '';
}
if (imageFileName) {
imageFileName.textContent = file.name;
}
const reader = new FileReader();
reader.onload = () => {
if (typeof reader.result === 'string') {
fileDataUrl = reader.result;
} else {
fileDataUrl = '';
toast(t('common.fileReadFailed'), 'error');
}
};
reader.onerror = () => {
fileDataUrl = '';
Promise.all(files.map(file => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
if (typeof reader.result === 'string') {
resolve({ name: file.name, data: reader.result });
} else {
reject(new Error('read_failed'));
}
};
reader.onerror = () => reject(new Error('read_failed'));
reader.readAsDataURL(file);
}))).then(items => {
fileDataUrls = items.map(item => item.data);
updateReferenceSummary(items.map((item, index) => `${index + 1}. ${item.name}`));
}).catch(() => {
fileDataUrls = [];
toast(t('common.fileReadFailed'), 'error');
};
reader.readAsDataURL(file);
updateReferenceSummary([]);
});
});
}

Expand All @@ -646,9 +679,18 @@

if (imageUrlInput) {
imageUrlInput.addEventListener('input', () => {
if (imageUrlInput.value.trim() && fileDataUrl) {
const urls = parseReferenceUrls(imageUrlInput.value);
if (urls.length > MAX_REFERENCE_IMAGES) {
toast(t('video.referenceLimit'), 'error');
}
if (imageUrlInput.value.trim() && fileDataUrls.length) {
clearFileSelection();
}
if (urls.length) {
updateReferenceSummary(urls.map((url, index) => `${index + 1}. ${url}`));
} else if (!fileDataUrls.length) {
updateReferenceSummary([]);
}
});
}

Expand Down
9 changes: 5 additions & 4 deletions _public/static/function/pages/video.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,16 @@ <h2 class="text-2xl font-semibold tracking-tight" data-i18n="video.title">Video
<div class="settings-grid">
<div class="settings-block prompt-block">
<label class="field-label" for="promptInput" data-i18n="video.prompt">提示词</label>
<textarea id="promptInput" class="geist-input video-textarea" placeholder="例如:街头霓虹雨夜,慢镜头,胶片质感" data-i18n-placeholder="video.promptPlaceholder"></textarea>
<textarea id="promptInput" class="geist-input video-textarea" placeholder="例如:@图1街头霓虹雨夜,@图2人物回头微笑,慢镜头,胶片质感" data-i18n-placeholder="video.promptPlaceholder"></textarea>
<div class="prompt-tip" data-i18n="video.promptTip">多图参考可在提示词中使用 @图1 到 @图7,按参考图顺序对应。</div>
</div>
<div class="settings-block ref-block">
<label class="field-label" for="imageUrlInput" data-i18n="video.referenceImage">参考图</label>
<div class="ref-controls">
<input id="imageUrlInput" class="geist-input" placeholder="https://... 或 data:image/..." data-i18n-placeholder="video.referenceImagePlaceholder">
<textarea id="imageUrlInput" class="geist-input ref-textarea" placeholder="每行一个 https://... 或 data:image/...,最多 7 张" data-i18n-placeholder="video.referenceImagePlaceholder"></textarea>
</div>
<div class="ref-meta">
<span id="imageFileName" class="ref-name" data-i18n="common.noFileSelected">未选择文件</span>
<span id="imageFileName" class="ref-name" data-i18n="video.noReferenceSelected">未选择参考图</span>
</div>
</div>
<div class="settings-block ratio-block">
Expand Down Expand Up @@ -95,7 +96,7 @@ <h2 class="text-2xl font-semibold tracking-tight" data-i18n="video.title">Video
<div class="settings-block upload-block">
<label class="field-label">&nbsp;</label>
<button id="selectImageFileBtn" class="geist-button-outline text-xs px-3" type="button" data-i18n="video.upload">上传</button>
<input id="imageFileInput" class="ref-file-input" type="file" accept="image/*">
<input id="imageFileInput" class="ref-file-input" type="file" accept="image/*" multiple>
</div>
<div class="settings-block clear-block">
<label class="field-label">&nbsp;</label>
Expand Down
11 changes: 7 additions & 4 deletions _public/static/i18n/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -503,13 +503,14 @@
"video": {
"pageTitle": "Grok2API - Video Generation",
"title": "Video Generation",
"subtitle": "Generate short videos with reference images and preset styles.",
"subtitle": "Generate short videos with up to 7 reference images, @图N placeholders, and preset styles.",
"startGenerate": "Generate",
"genSettings": "Generation Settings",
"prompt": "Prompt",
"promptPlaceholder": "e.g.: neon rain at night on the street, slow motion, film grain",
"promptPlaceholder": "e.g.: @图1 neon rainy street at night, @图2 subject looking back and smiling, slow motion, film grain",
"promptTip": "Use @图1 to @图7 in the prompt to reference images by upload order.",
"referenceImage": "Reference Image",
"referenceImagePlaceholder": "https://... or data:image/...",
"referenceImagePlaceholder": "One https://... or data:image/... per line, up to 7 images",
"aspectRatio": "Aspect Ratio",
"ratio3_2": "3:2 Landscape",
"ratio2_3": "2:3 Portrait",
Expand Down Expand Up @@ -538,7 +539,9 @@
"superResolution": "Super Resolution",
"superResolutionInProgress": "Super resolution in progress",
"alreadyGenerating": "Already generating",
"referenceConflict": "Reference image: choose either URL/Base64 or file upload",
"referenceConflict": "Reference images: choose either URL/Base64 list or file upload",
"referenceLimit": "A maximum of 7 reference images is supported",
"noReferenceSelected": "No reference images selected",
"downloadFailed": "Download failed, please check if the video link is accessible",
"sec6": "6s",
"sec10": "10s",
Expand Down
11 changes: 7 additions & 4 deletions _public/static/i18n/locales/zh.json
Original file line number Diff line number Diff line change
Expand Up @@ -503,13 +503,14 @@
"video": {
"pageTitle": "Grok2API - Video 视频生成",
"title": "Video 视频生成",
"subtitle": "生成短视频,支持参考图与多种预设风格。",
"subtitle": "生成短视频,支持最多 7 张参考图、@图N 引用与多种预设风格。",
"startGenerate": "开始生成",
"genSettings": "生成设置",
"prompt": "提示词",
"promptPlaceholder": "例如:街头霓虹雨夜,慢镜头,胶片质感",
"promptPlaceholder": "例如:@图1街头霓虹雨夜,@图2人物回头微笑,慢镜头,胶片质感",
"promptTip": "多图参考可在提示词中使用 @图1 到 @图7,按参考图顺序对应。",
"referenceImage": "参考图",
"referenceImagePlaceholder": "https://... 或 data:image/...",
"referenceImagePlaceholder": "每行一个 https://... 或 data:image/...,最多 7 张",
"aspectRatio": "画面比例",
"ratio3_2": "3:2 横构图",
"ratio2_3": "2:3 竖构图",
Expand Down Expand Up @@ -538,7 +539,9 @@
"superResolution": "超分辨率",
"superResolutionInProgress": "超分辨率中",
"alreadyGenerating": "已在生成中",
"referenceConflict": "参考图只能选择其一:URL/Base64 或 本地上传",
"referenceConflict": "参考图只能选择其一:URL/Base64 列表 或 本地上传",
"referenceLimit": "参考图最多支持 7 张",
"noReferenceSelected": "未选择参考图",
"downloadFailed": "下载失败,请检查视频链接是否可访问",
"sec6": "6 秒",
"sec10": "10 秒",
Expand Down
44 changes: 32 additions & 12 deletions app/api/v1/function/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def _new_session(
video_length: int,
resolution_name: str,
preset: str,
image_url: Optional[str],
image_urls: Optional[List[str]],
reasoning_effort: Optional[str],
) -> str:
task_id = uuid.uuid4().hex
Expand All @@ -62,7 +62,7 @@ async def _new_session(
"video_length": video_length,
"resolution_name": resolution_name,
"preset": preset,
"image_url": image_url,
"image_urls": image_urls or [],
"reasoning_effort": reasoning_effort,
"created_at": now,
}
Expand Down Expand Up @@ -123,13 +123,23 @@ def _validate_image_url(image_url: str) -> None:
)


def _normalize_image_urls(values: Optional[List[str]]) -> List[str]:
normalized: List[str] = []
if isinstance(values, list):
for item in values:
value = (item or "").strip()
if value:
normalized.append(value)
return normalized


class VideoStartRequest(BaseModel):
prompt: str
aspect_ratio: Optional[str] = "3:2"
video_length: Optional[int] = 6
resolution_name: Optional[str] = "480p"
preset: Optional[str] = "normal"
image_url: Optional[str] = None
image_urls: Optional[List[str]] = None
reasoning_effort: Optional[str] = None


Expand Down Expand Up @@ -166,8 +176,12 @@ async def function_video_start(data: VideoStartRequest):
detail="preset must be one of ['fun','normal','spicy','custom']",
)

image_url = (data.image_url or "").strip() or None
if image_url:
image_urls = _normalize_image_urls(data.image_urls)
if len(image_urls) > 7:
raise HTTPException(
status_code=400, detail="image_urls supports at most 7 references"
)
for image_url in image_urls:
_validate_image_url(image_url)

reasoning_effort = (data.reasoning_effort or "").strip() or None
Expand All @@ -185,7 +199,7 @@ async def function_video_start(data: VideoStartRequest):
video_length,
resolution_name,
preset,
image_url,
image_urls,
reasoning_effort,
)
return {"task_id": task_id, "aspect_ratio": aspect_ratio}
Expand All @@ -202,7 +216,11 @@ async def function_video_sse(request: Request, task_id: str = Query("")):
video_length = int(session.get("video_length") or 6)
resolution_name = str(session.get("resolution_name") or "480p")
preset = str(session.get("preset") or "normal")
image_url = session.get("image_url")
image_urls = [
str(item).strip()
for item in (session.get("image_urls") or [])
if str(item).strip()
]
reasoning_effort = session.get("reasoning_effort")

async def event_stream():
Expand All @@ -218,14 +236,16 @@ async def event_stream():
yield "data: [DONE]\n\n"
return

if image_url:
if image_urls:
content: List[Dict[str, Any]] = [{"type": "text", "text": prompt}]
for image_url in image_urls:
content.append(
{"type": "image_url", "image_url": {"url": image_url}}
)
messages: List[Dict[str, Any]] = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}},
],
"content": content,
}
]
else:
Expand Down
Loading
Loading