Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 3 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

# Use specific Python version for consistency
ARG PYTHON_VERSION=3.12
# Use Ubuntu 24.04 for GLIBC 2.39 compatibility (required by c2patool)
ARG PYTHON_IMAGE=ubuntu:24.04
ARG PYTHON_IMAGE=python:${PYTHON_VERSION}-slim-bookworm

# Stage 1: Builder - Install dependencies with uv
FROM ghcr.io/astral-sh/uv:python${PYTHON_VERSION}-bookworm-slim AS builder
Expand Down Expand Up @@ -38,29 +37,16 @@ COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --all-packages

# Make c2patool executable
RUN chmod +x /app/packages/gptzero/resources/c2patool/v0.16.1/Linux/c2patool || true

# Stage 2: Final runtime image without uv
FROM ${PYTHON_IMAGE}

ARG PYTHON_VERSION

# Create non-root user for security
RUN groupadd --system --gid 999 appuser \
&& useradd --system --gid 999 --uid 999 --create-home appuser

# Install Python and curl for healthchecks
# Install curl for healthchecks
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
curl \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& mkdir -p /usr/local/bin \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
&& apt-get install -y --no-install-recommends curl \
&& rm -rf /var/lib/apt/lists/*

# Copy the application and virtual environment from builder
Expand Down
1 change: 1 addition & 0 deletions packages/gptzero/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ license = {text = "MIT"}
keywords = ["content-authenticity", "image-verification", "metadata", "c2pa", "exif"]

dependencies = [
"c2pa-python>=0.27.0,<1.0.0",
"exif>=1.0.0,<2",
]

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
60 changes: 20 additions & 40 deletions packages/gptzero/src/gptzero/handlers/c2pa.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
"""C2PA metadata handler."""

import io
import json
import subprocess
import tempfile
from pathlib import Path

from c2pa import C2paError, Reader

from gptzero.handlers.base import MetadataHandler
from gptzero.models import C2PAMetadata
from gptzero.utils import get_c2pa_binary_path, get_file_extension


class C2PAHandler(MetadataHandler):
"""Handler for C2PA metadata extraction."""

def __init__(self, binary_path: Path | None = None):
"""
Initialize C2PA handler.

Args:
binary_path: Optional path to c2patool binary. If None, will auto-detect.
"""
self.binary_path = binary_path or get_c2pa_binary_path()
"""Handler for C2PA metadata extraction using c2pa-python bindings."""

def extract(
self, data: bytes, mime_type: str
Expand All @@ -35,38 +25,28 @@ def extract(
Returns:
Tuple of (success, C2PAMetadata, error_message)
"""
if self.binary_path is None:
return False, None, "Unsupported platform or missing binary"

extension = get_file_extension(mime_type)
if extension is None:
return False, None, f"Unsupported MIME type: {mime_type}"

# Create a temporary file to save the image
with tempfile.NamedTemporaryFile(suffix=extension) as temp_file:
temp_file.write(data)
temp_file.flush()
temp_file_path = temp_file.name
try:
# Create a BytesIO stream from the image data
stream = io.BytesIO(data)

# Run the c2patool binary
result = subprocess.run(
[str(self.binary_path), "-d", temp_file_path],
capture_output=True,
text=True,
check=False,
)
# Use c2pa Reader to extract manifest
reader = Reader(mime_type, stream)
manifest_json = reader.json()

if result.returncode != 0:
stderr_stripped = result.stderr.strip()
if stderr_stripped == "Error: No claim found":
if manifest_json is None:
return True, None, None # Success, but no C2PA data
return False, None, f"Error checking C2PA: {stderr_stripped}"

# Parse the output
try:
manifest = json.loads(result.stdout)
# Parse the manifest JSON
manifest = json.loads(manifest_json)
c2pa_metadata = C2PAMetadata.from_manifest(manifest)
return True, c2pa_metadata, None

except C2paError as e:
# Check if it's a ManifestNotFound error (no C2PA data)
error_str = str(e)
if "ManifestNotFound" in error_str or "no JUMBF data found" in error_str:
return True, None, None # Success, but no C2PA data
return False, None, f"Error checking C2PA: {error_str}"
except json.JSONDecodeError:
return False, None, "C2PA metadata found but cannot be decoded"
except Exception as e:
Expand Down
85 changes: 54 additions & 31 deletions packages/gptzero/src/gptzero/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,56 +68,79 @@ def from_manifest(cls, manifest: dict[str, Any]) -> "C2PAMetadata":
"""
Parse a C2PA manifest dictionary and extract relevant metadata.

Uses c2pa-python library format.

Args:
manifest: Dictionary containing C2PA manifest data
manifest: Dictionary containing C2PA manifest data from c2pa-python

Returns:
C2PAMetadata object with parsed information
"""
active_manifest_id = manifest.get("active_manifest")
active_manifest = manifest.get("manifests", {}).get(active_manifest_id, {})
claim = active_manifest.get("claim", {})
claim_generator_info = claim.get("claim_generator_info", {})
instance_id = claim.get("instanceID", "Unknown")
title = claim.get("dc:title", "Unknown")

signature_info = active_manifest.get("signature", {})
return cls._from_c2pa_python_format(manifest, active_manifest)

@classmethod
def _from_c2pa_python_format(
cls, manifest: dict[str, Any], active_manifest: dict[str, Any]
) -> "C2PAMetadata":
"""Parse manifest from c2pa-python library format."""
# Extract basic metadata from active manifest
instance_id = active_manifest.get("instance_id", "Unknown")
title = active_manifest.get("title", "Unknown")

# Get signature info
signature_info = active_manifest.get("signature_info", {})
issuer = signature_info.get("issuer", "Unknown")

assertion_store = active_manifest.get("assertion_store", {})
assertion_manifest_id = (
assertion_store.get("c2pa.ingredient.v3", {})
.get("activeManifest", {})
.get("url", "")
.split("/")[-1]
)
# Get claim generator info
claim_generator_info = active_manifest.get("claim_generator_info", [])
generator_name = "Unknown"
if claim_generator_info and isinstance(claim_generator_info, list):
generator_name = claim_generator_info[0].get("name", "Unknown")

# Extract software agents and digital source type from ingredients
software_agents: list[SoftwareAgent] = []
digital_source_type: str | None = None

assertion_manifest = manifest.get("manifests", {}).get(assertion_manifest_id, {})

if assertion_manifest:
assertion_assertion_store = assertion_manifest.get("assertion_store", {})

# Extract software agents and digital source type from assertions
actions = assertion_assertion_store.get("c2pa.actions.v2", {}).get("actions", [])
for action in actions:
agent_name = action.get("softwareAgent", {}).get("name")
if agent_name and agent_name not in [sa.name for sa in software_agents]:
action_type = action.get("action", "").replace("c2pa.", "")
software_agents.append(SoftwareAgent(name=agent_name, action=action_type))

if "digitalSourceType" in action:
digital_source_type = action.get("digitalSourceType", "")
if "trainedAlgorithmicMedia" in digital_source_type:
digital_source_type = "This content was generated with an AI tool"
ingredients = active_manifest.get("ingredients", [])
for ingredient in ingredients:
ingredient_manifest_id = ingredient.get("active_manifest")
if ingredient_manifest_id:
ingredient_manifest = manifest.get("manifests", {}).get(
ingredient_manifest_id, {}
)

# Look for actions in ingredient assertions
assertions = ingredient_manifest.get("assertions", [])
for assertion in assertions:
if assertion.get("label") in ("c2pa.actions", "c2pa.actions.v2"):
actions_data = assertion.get("data", {})
actions = actions_data.get("actions", [])

for action in actions:
agent_name = action.get("softwareAgent", {}).get("name")
if agent_name and agent_name not in [
sa.name for sa in software_agents
]:
action_type = action.get("action", "").replace("c2pa.", "")
software_agents.append(
SoftwareAgent(name=agent_name, action=action_type)
)

if "digitalSourceType" in action:
dst = action.get("digitalSourceType", "")
if "trainedAlgorithmicMedia" in dst:
digital_source_type = (
"This content was generated with an AI tool"
)

return cls(
instance_id=instance_id,
title=title,
issuer=issuer,
generator_name=claim_generator_info.get("name", "Unknown"),
generator_name=generator_name,
digital_source_type=digital_source_type,
software_agents=software_agents,
)
Expand Down
96 changes: 0 additions & 96 deletions packages/gptzero/src/gptzero/utils.py

This file was deleted.

Loading