Skip to content

Commit af500e3

Browse files
committed
AI tagging
1 parent 892be4a commit af500e3

File tree

3 files changed

+309
-1
lines changed

3 files changed

+309
-1
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ COPY templates /app/templates
77
COPY static /app/static
88
RUN pip install --no-cache-dir flask waitress
99
EXPOSE 8080
10-
CMD ["waitress-serve","--port=8080","app:app"]
10+
CMD ["waitress-serve","--port=8080","--threads=16","app:app"]

app.py

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ def api_autotag():
599599
return jsonify(ok=False, error="auth"), 401
600600
api_key = get_gemini_key()
601601
if not api_key:
602+
LOG.warning("autotag: aucune clé API configurée (GEMINI_API_KEY vide). Configurez-la dans /maintenance ou via la variable d'env.")
602603
return jsonify(ok=False, error="no_api_key", message="GEMINI_API_KEY non configurée"), 400
603604
try:
604605
import urllib.request
@@ -815,6 +816,308 @@ def api_ai_config_set():
815816
set_ollama_model(data["ollama_model"])
816817
return jsonify(ok=True)
817818

819+
# ============================================================
820+
# --- Batch Auto-Tag IA ---
821+
# ============================================================
822+
AUTOTAG_BATCH = {
823+
"status": "idle", # idle | running | paused | done | error
824+
"total": 0,
825+
"done": 0,
826+
"skipped": 0,
827+
"errors": 0,
828+
"current": "", # nom du fichier en cours
829+
"message": "",
830+
"started": 0,
831+
"finished": 0,
832+
"paused_at": 0,
833+
}
834+
AUTOTAG_BATCH_LOCK = threading.Lock()
835+
_AUTOTAG_PAUSE_EV = threading.Event()
836+
_AUTOTAG_PAUSE_EV.set() # not paused initially
837+
_AUTOTAG_STOP_EV = threading.Event()
838+
_AUTOTAG_THREAD = None
839+
AUTOTAG_IA_LOG = [] # [ {ts, vid, name, tags, error}, ... ] (max 200)
840+
AUTOTAG_IA_LOG_LOCK = threading.Lock()
841+
842+
def _autotag_log(entry: dict):
843+
with AUTOTAG_IA_LOG_LOCK:
844+
AUTOTAG_IA_LOG.append(entry)
845+
if len(AUTOTAG_IA_LOG) > 200:
846+
AUTOTAG_IA_LOG.pop(0)
847+
848+
def _autotag_batch_worker(reset_existing: bool):
849+
global _AUTOTAG_THREAD
850+
try:
851+
state = read_state()
852+
utags = state.get("utags", {}) or {}
853+
854+
# Identifier quelles vidéos ont besoin d'être taguées par IA
855+
# On détecte les tags IA via un marqueur dans utags OU simplement
856+
# toutes les vidéos qui n'ont pas encore de utags
857+
all_vids = [v for v in MEDIA if isinstance(v, dict) and v.get("id") and v.get("kind") != "folder"]
858+
if reset_existing:
859+
to_tag = all_vids
860+
else:
861+
# Vidéos sans utags existants
862+
to_tag = [v for v in all_vids if not utags.get(v["id"])]
863+
864+
with AUTOTAG_BATCH_LOCK:
865+
AUTOTAG_BATCH.update({
866+
"total": len(to_tag),
867+
"done": 0,
868+
"skipped": 0,
869+
"errors": 0,
870+
"current": "",
871+
"message": f"Démarrage — {len(to_tag)} vidéo(s) à traiter",
872+
"started": int(time.time()),
873+
"finished": 0,
874+
})
875+
876+
LOG.info("[autotag-batch] Démarrage : %d vidéos à tagger (reset_existing=%s)", len(to_tag), reset_existing)
877+
878+
for vid_item in to_tag:
879+
# Vérifie l'arrêt
880+
if _AUTOTAG_STOP_EV.is_set():
881+
with AUTOTAG_BATCH_LOCK:
882+
AUTOTAG_BATCH["status"] = "idle"
883+
AUTOTAG_BATCH["message"] = "Arrêté manuellement"
884+
AUTOTAG_BATCH["finished"] = int(time.time())
885+
LOG.info("[autotag-batch] Arrêt manuel.")
886+
return
887+
888+
# Attend si en pause
889+
while not _AUTOTAG_PAUSE_EV.is_set():
890+
if _AUTOTAG_STOP_EV.is_set():
891+
break
892+
time.sleep(0.5)
893+
894+
if _AUTOTAG_STOP_EV.is_set():
895+
break
896+
897+
with AUTOTAG_BATCH_LOCK:
898+
AUTOTAG_BATCH["status"] = "running"
899+
AUTOTAG_BATCH["current"] = vid_item.get("name", vid_item["id"])
900+
AUTOTAG_BATCH["message"] = f"Traitement : {vid_item.get('name','')}"
901+
902+
vid = vid_item["id"]
903+
name = vid_item.get("name", vid)
904+
905+
try:
906+
ridx, rel = id_to_parts(vid)
907+
filepath = os.path.join(MEDIA_DIRS[ridx], rel) if ridx < len(MEDIA_DIRS) else None
908+
if not filepath or not os.path.isfile(filepath):
909+
with AUTOTAG_BATCH_LOCK:
910+
AUTOTAG_BATCH["skipped"] += 1
911+
AUTOTAG_BATCH["done"] += 1
912+
_autotag_log({"ts": int(time.time()), "vid": vid, "name": name, "tags": [], "error": "file_not_found"})
913+
continue
914+
915+
# Extraire les frames
916+
duration = get_video_duration(filepath)
917+
if duration <= 0: duration = 60.0
918+
t1 = 7.0
919+
t2 = duration / 2.0
920+
t3 = max(0.0, duration - 11.0)
921+
if duration < 18:
922+
t1, t2, t3 = duration * 0.2, duration * 0.5, duration * 0.8
923+
924+
frames_b64 = []
925+
for t in [t1, t2, t3]:
926+
try:
927+
r = subprocess.run(
928+
["ffmpeg", "-ss", str(t), "-i", filepath, "-frames:v", "1",
929+
"-vf", "scale=512:-1", "-f", "image2", "-q:v", "3", "pipe:1"],
930+
capture_output=True, timeout=30
931+
)
932+
if r.stdout and len(r.stdout) > 100:
933+
frames_b64.append(base64.b64encode(r.stdout).decode("ascii"))
934+
except Exception:
935+
pass
936+
937+
if not frames_b64:
938+
with AUTOTAG_BATCH_LOCK:
939+
AUTOTAG_BATCH["skipped"] += 1
940+
AUTOTAG_BATCH["done"] += 1
941+
_autotag_log({"ts": int(time.time()), "vid": vid, "name": name, "tags": [], "error": "ffmpeg_failed"})
942+
continue
943+
944+
# Appel IA
945+
import urllib.request as _urlreq
946+
engine = get_ai_engine()
947+
prompt = (
948+
"Analyze these video frames. Return ONLY a JSON array of maximum 5 descriptive tags "
949+
"(single words, lowercase, English). Tags should describe: scene type, setting, "
950+
"visible objects, people characteristics, mood, activity, colors. "
951+
"Example: [\"outdoor\", \"beach\", \"sunset\", \"woman\", \"running\"]. "
952+
"Return ONLY the JSON array, nothing else."
953+
)
954+
tags_raw = []
955+
956+
if engine == "ollama":
957+
ollama_url = get_ollama_url().rstrip('/') + "/api/generate"
958+
payload = json.dumps({
959+
"model": get_ollama_model(), "prompt": prompt,
960+
"images": frames_b64, "format": "json",
961+
"stream": False, "options": {"temperature": 0.3}
962+
}).encode("utf-8")
963+
req = _urlreq.Request(ollama_url, data=payload, headers={"Content-Type": "application/json"})
964+
with _urlreq.urlopen(req, timeout=120) as resp:
965+
txt = json.loads(resp.read().decode()).get("response", "").strip()
966+
if txt.startswith("```"): txt = txt.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
967+
tags_raw = json.loads(txt)
968+
else: # gemini
969+
api_key = get_gemini_key()
970+
if not api_key:
971+
with AUTOTAG_BATCH_LOCK:
972+
AUTOTAG_BATCH["status"] = "error"
973+
AUTOTAG_BATCH["message"] = "Clé API manquante — batch interrompu"
974+
AUTOTAG_BATCH["finished"] = int(time.time())
975+
LOG.error("[autotag-batch] Clé API Gemini manquante, arrêt du batch.")
976+
return
977+
gemini_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
978+
parts = [{"text": prompt}] + [{"inline_data": {"mime_type": "image/jpeg", "data": b}} for b in frames_b64]
979+
payload = json.dumps({"contents": [{"parts": parts}], "generationConfig": {"temperature": 0.3, "maxOutputTokens": 200}}).encode()
980+
req = _urlreq.Request(gemini_url, data=payload, headers={"Content-Type": "application/json"})
981+
with _urlreq.urlopen(req, timeout=30) as resp:
982+
resp_j = json.loads(resp.read().decode())
983+
txt = resp_j["candidates"][0]["content"]["parts"][0]["text"].strip()
984+
if txt.startswith("```"): txt = txt.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
985+
tags_raw = json.loads(txt)
986+
987+
# Filtrer et sauvegarder
988+
banned = get_banned_tags()
989+
tags = []
990+
seen = set()
991+
for t in tags_raw:
992+
if not isinstance(t, str): continue
993+
ct = canon_tag(t, banned)
994+
if ct and ct not in seen and len(ct) <= 40:
995+
seen.add(ct); tags.append(ct)
996+
if len(tags) >= 5: break
997+
998+
if tags:
999+
st2 = read_state()
1000+
ut2 = st2.get("utags", {}) or {}
1001+
cur = set(canon_tag(x) for x in (ut2.get(vid) or []))
1002+
for t in tags: cur.add(t)
1003+
cur.discard("")
1004+
ut2[vid] = sorted(cur)[:20]
1005+
st2["utags"] = ut2
1006+
write_state(st2)
1007+
_log_event("autotag", vid=vid, name=name, tags=tags)
1008+
1009+
LOG.info("[autotag-batch] %s → %s", name, tags or "(aucun tag)")
1010+
_autotag_log({"ts": int(time.time()), "vid": vid, "name": name, "tags": tags, "error": None})
1011+
1012+
with AUTOTAG_BATCH_LOCK:
1013+
AUTOTAG_BATCH["done"] += 1
1014+
1015+
# Petite pause entre les appels pour ne pas saturer l'API
1016+
time.sleep(1.5)
1017+
1018+
except Exception as e:
1019+
LOG.warning("[autotag-batch] Erreur sur %s : %s", name, e)
1020+
_autotag_log({"ts": int(time.time()), "vid": vid, "name": name, "tags": [], "error": str(e)})
1021+
with AUTOTAG_BATCH_LOCK:
1022+
AUTOTAG_BATCH["errors"] += 1
1023+
AUTOTAG_BATCH["done"] += 1
1024+
time.sleep(2)
1025+
1026+
with AUTOTAG_BATCH_LOCK:
1027+
AUTOTAG_BATCH["status"] = "done"
1028+
AUTOTAG_BATCH["current"] = ""
1029+
AUTOTAG_BATCH["message"] = f"Terminé : {AUTOTAG_BATCH['done']} traités, {AUTOTAG_BATCH['errors']} erreurs"
1030+
AUTOTAG_BATCH["finished"] = int(time.time())
1031+
LOG.info("[autotag-batch] Terminé.")
1032+
1033+
except Exception as e:
1034+
LOG.exception("[autotag-batch] Erreur critique : %s", e)
1035+
with AUTOTAG_BATCH_LOCK:
1036+
AUTOTAG_BATCH["status"] = "error"
1037+
AUTOTAG_BATCH["message"] = f"Erreur critique : {e}"
1038+
AUTOTAG_BATCH["finished"] = int(time.time())
1039+
finally:
1040+
_AUTOTAG_THREAD = None
1041+
1042+
1043+
@app.route("/api/autotag/batch/status", methods=["GET"])
1044+
def api_autotag_batch_status():
1045+
if not auth_required():
1046+
return jsonify(ok=False, error="auth"), 401
1047+
with AUTOTAG_BATCH_LOCK:
1048+
s = dict(AUTOTAG_BATCH)
1049+
# Ajouter le log IA récent (les 20 derniers)
1050+
with AUTOTAG_IA_LOG_LOCK:
1051+
log = list(reversed(AUTOTAG_IA_LOG[-20:]))
1052+
return jsonify(ok=True, batch=s, log=log)
1053+
1054+
1055+
@app.route("/api/autotag/batch/start", methods=["POST"])
1056+
def api_autotag_batch_start():
1057+
global _AUTOTAG_THREAD
1058+
if not auth_required():
1059+
return jsonify(ok=False, error="auth"), 401
1060+
if not get_gemini_key() and get_ai_engine() != "ollama":
1061+
return jsonify(ok=False, error="no_api_key", message="Aucune clé API configurée"), 400
1062+
data = request.get_json(force=True, silent=True) or {}
1063+
reset_existing = bool(data.get("reset_existing", False))
1064+
1065+
with AUTOTAG_BATCH_LOCK:
1066+
if AUTOTAG_BATCH["status"] == "running":
1067+
return jsonify(ok=False, error="already_running"), 409
1068+
AUTOTAG_BATCH["status"] = "running"
1069+
1070+
_AUTOTAG_STOP_EV.clear()
1071+
_AUTOTAG_PAUSE_EV.set()
1072+
1073+
_AUTOTAG_THREAD = threading.Thread(
1074+
target=_autotag_batch_worker, args=(reset_existing,), daemon=True
1075+
)
1076+
_AUTOTAG_THREAD.start()
1077+
LOG.info("[autotag-batch] Démarré par l'utilisateur (reset_existing=%s)", reset_existing)
1078+
return jsonify(ok=True, status="running")
1079+
1080+
1081+
@app.route("/api/autotag/batch/pause", methods=["POST"])
1082+
def api_autotag_batch_pause():
1083+
if not auth_required():
1084+
return jsonify(ok=False, error="auth"), 401
1085+
_AUTOTAG_PAUSE_EV.clear()
1086+
with AUTOTAG_BATCH_LOCK:
1087+
AUTOTAG_BATCH["status"] = "paused"
1088+
AUTOTAG_BATCH["paused_at"] = int(time.time())
1089+
AUTOTAG_BATCH["message"] = "En pause"
1090+
LOG.info("[autotag-batch] Mis en pause.")
1091+
return jsonify(ok=True, status="paused")
1092+
1093+
1094+
@app.route("/api/autotag/batch/resume", methods=["POST"])
1095+
def api_autotag_batch_resume():
1096+
if not auth_required():
1097+
return jsonify(ok=False, error="auth"), 401
1098+
_AUTOTAG_PAUSE_EV.set()
1099+
with AUTOTAG_BATCH_LOCK:
1100+
AUTOTAG_BATCH["status"] = "running"
1101+
AUTOTAG_BATCH["message"] = "Reprise…"
1102+
LOG.info("[autotag-batch] Repris.")
1103+
return jsonify(ok=True, status="running")
1104+
1105+
1106+
@app.route("/api/autotag/batch/stop", methods=["POST"])
1107+
def api_autotag_batch_stop():
1108+
if not auth_required():
1109+
return jsonify(ok=False, error="auth"), 401
1110+
_AUTOTAG_STOP_EV.set()
1111+
_AUTOTAG_PAUSE_EV.set() # débloquer la pause pour que le thread puisse s'arrêter
1112+
with AUTOTAG_BATCH_LOCK:
1113+
AUTOTAG_BATCH["status"] = "idle"
1114+
AUTOTAG_BATCH["message"] = "Arrêté"
1115+
AUTOTAG_BATCH["finished"] = int(time.time())
1116+
LOG.info("[autotag-batch] Arrêt demandé.")
1117+
return jsonify(ok=True, status="idle")
1118+
1119+
1120+
8181121
# --- Journal/helper & storage files ---
8191122
SCAN_CACHE_FILE = os.path.join(DATA_DIR if 'DATA_DIR' in globals() else '/data', 'scan_cache.json')
8201123
THUMB_PROGRESS = {"total":0,"done":0,"running":False,"last_error":""}

docker-compose.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ services:
2121
MINI_PASS: ${MINI_PASS}
2222
# En console : openssl rand -hex 32
2323
SECRET_KEY:
24+
# Gemini auto-tagging (clé API Google — https://aistudio.google.com/apikey)
25+
GEMINI_API_KEY: ${GEMINI_API_KEY}
26+
# Ollama (alternative locale à Gemini)
27+
# OLLAMA_URL: http://host.docker.internal:11434
28+
# OLLAMA_MODEL: llava
2429
# Liste noire tags (les mots de moins de 3 lettres sont automatiquement bannis)
2530
MINI_BANNED_TAGS: >
2631
and,the,source,video,videos,vid,vids,film,movie,part,

0 commit comments

Comments
 (0)