Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""43_add_blocknote_fields_to_documents
Revision ID: 43
Revises: 42
Create Date: 2025-11-30
Adds fields for live document editing:
- blocknote_document: JSONB editor state
- content_needs_reindexing: Flag for regenerating chunks/summary
- last_edited_at: Last edit timestamp
"""

from collections.abc import Sequence

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "43"
down_revision: str | None = "42"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Upgrade schema - Add BlockNote fields and trigger population task."""

# Add the columns
op.add_column(
"documents",
sa.Column(
"blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
)
op.add_column(
"documents",
sa.Column(
"content_needs_reindexing",
sa.Boolean(),
nullable=False,
server_default=sa.false(),
),
)
op.add_column(
"documents",
sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
)

# Trigger the Celery task to populate blocknote_document for existing documents
try:
from app.tasks.celery_tasks.blocknote_migration_tasks import (
populate_blocknote_for_documents_task,
)

# Queue the task to run asynchronously
populate_blocknote_for_documents_task.apply_async()
print(
"✓ Queued Celery task to populate blocknote_document for existing documents"
)
except Exception as e:
# If Celery is not available or task queueing fails, log but don't fail the migration
print(f"⚠ Warning: Could not queue blocknote population task: {e}")
print(" You can manually trigger it later with:")
print(
" celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"
)


def downgrade() -> None:
"""Downgrade schema - Remove BlockNote fields."""
op.drop_column("documents", "last_edited_at")
op.drop_column("documents", "content_needs_reindexing")
op.drop_column("documents", "blocknote_document")
2 changes: 2 additions & 0 deletions surfsense_backend/app/celery_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def parse_schedule_interval(interval: str) -> dict:
"app.tasks.celery_tasks.podcast_tasks",
"app.tasks.celery_tasks.connector_tasks",
"app.tasks.celery_tasks.schedule_checker_task",
"app.tasks.celery_tasks.blocknote_migration_tasks",
"app.tasks.celery_tasks.document_reindex_tasks",
],
)

Expand Down
13 changes: 12 additions & 1 deletion surfsense_backend/app/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
UniqueConstraint,
text,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, declared_attr, relationship

Expand Down Expand Up @@ -343,6 +343,17 @@ class Document(BaseModel, TimestampMixin):
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))

# BlockNote live editing state (NULL when never edited)
blocknote_document = Column(JSONB, nullable=True)

# blocknote background reindex flag
content_needs_reindexing = Column(
Boolean, nullable=False, default=False, server_default=text("false")
)

# Track when blocknote document was last edited
last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)

search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
)
Expand Down
2 changes: 2 additions & 0 deletions surfsense_backend/app/routes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
)
from .chats_routes import router as chats_router
from .documents_routes import router as documents_router
from .editor_routes import router as editor_router
from .google_calendar_add_connector_route import (
router as google_calendar_add_connector_router,
)
Expand All @@ -23,6 +24,7 @@

router.include_router(search_spaces_router)
router.include_router(rbac_router) # RBAC routes for roles, members, invites
router.include_router(editor_router)
router.include_router(documents_router)
router.include_router(podcasts_router)
router.include_router(chats_router)
Expand Down
140 changes: 140 additions & 0 deletions surfsense_backend/app/routes/editor_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""
Editor routes for BlockNote document editing.
"""

from datetime import UTC, datetime
from typing import Any

from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.db import Document, SearchSpace, User, get_async_session
from app.users import current_active_user

router = APIRouter()


@router.get("/documents/{document_id}/editor-content")
async def get_editor_content(
document_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get document content for editing.

Returns BlockNote JSON document. If blocknote_document is NULL,
attempts to generate it from chunks (lazy migration).
"""
from sqlalchemy.orm import selectinload

result = await session.execute(
select(Document)
.options(selectinload(Document.chunks))
.join(SearchSpace)
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()

if not document:
raise HTTPException(status_code=404, detail="Document not found")

# If blocknote_document exists, return it
if document.blocknote_document:
return {
"document_id": document.id,
"title": document.title,
"blocknote_document": document.blocknote_document,
"last_edited_at": document.last_edited_at.isoformat()
if document.last_edited_at
else None,
}

# Lazy migration: Try to generate blocknote_document from chunks
from app.utils.blocknote_converter import convert_markdown_to_blocknote

chunks = sorted(document.chunks, key=lambda c: c.id)

if not chunks:
raise HTTPException(
status_code=400,
detail="This document has no chunks and cannot be edited. Please re-upload to enable editing.",
)

# Reconstruct markdown from chunks
markdown_content = "\n\n".join(chunk.content for chunk in chunks)

if not markdown_content.strip():
raise HTTPException(
status_code=400,
detail="This document has empty content and cannot be edited.",
)

# Convert to BlockNote
blocknote_json = await convert_markdown_to_blocknote(markdown_content)

if not blocknote_json:
raise HTTPException(
status_code=500,
detail="Failed to convert document to editable format. Please try again later.",
)

# Save the generated blocknote_document (lazy migration)
document.blocknote_document = blocknote_json
document.content_needs_reindexing = False
document.last_edited_at = None
await session.commit()

return {
"document_id": document.id,
"title": document.title,
"blocknote_document": blocknote_json,
"last_edited_at": None,
}


@router.post("/documents/{document_id}/save")
async def save_document(
document_id: int,
data: dict[str, Any],
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Save BlockNote document and trigger reindexing.
Called when user clicks 'Save & Exit'.
"""
from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task

# Verify ownership
result = await session.execute(
select(Document)
.join(SearchSpace)
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()

if not document:
raise HTTPException(status_code=404, detail="Document not found")

blocknote_document = data.get("blocknote_document")
if not blocknote_document:
raise HTTPException(status_code=400, detail="blocknote_document is required")

# Save BlockNote document
document.blocknote_document = blocknote_document
document.last_edited_at = datetime.now(UTC)
document.content_needs_reindexing = True

await session.commit()

# Queue reindex task
reindex_document_task.delay(document_id, str(user.id))

return {
"status": "saved",
"document_id": document_id,
"message": "Document saved and will be reindexed in the background",
"last_edited_at": document.last_edited_at.isoformat(),
}
Loading
Loading