Folders, Persistent Import Progress Tracking & UX Enhancements (#3841)

Co-authored-by: Jonathan Dobson <jon.m.dobson@gmail.com>
2025-11-05 18:37:18 +03:00
parent fcc3f30ba4
commit 75948053b9
32 changed files with 2886 additions and 538 deletions
--- a/skyvern/forge/sdk/db/client.py
+++ b/skyvern/forge/sdk/db/client.py
@@ -27,6 +27,7 @@ from skyvern.forge.sdk.db.models import (
    CredentialModel,
    CredentialParameterModel,
    DebugSessionModel,
+    FolderModel,
    OnePasswordCredentialParameterModel,
    OrganizationAuthTokenModel,
    OrganizationBitwardenCollectionModel,
@@ -1405,6 +1406,7 @@ class AgentDB:
        cache_key: str | None = None,
        run_sequentially: bool = False,
        sequential_key: str | None = None,
+        folder_id: str | None = None,
    ) -> Workflow:
        async with self.Session() as session:
            workflow = WorkflowModel(
@@ -1427,6 +1429,7 @@ class AgentDB:
                cache_key=cache_key or DEFAULT_SCRIPT_RUN_ID,
                run_sequentially=run_sequentially,
                sequential_key=sequential_key,
+                folder_id=folder_id,
            )
            if workflow_permanent_id:
                workflow.workflow_permanent_id = workflow_permanent_id
@@ -1605,14 +1608,15 @@ class AgentDB:
        only_saved_tasks: bool = False,
        only_workflows: bool = False,
        search_key: str | None = None,
+        folder_id: str | None = None,
        statuses: list[WorkflowStatus] | None = None,
    ) -> list[Workflow]:
        """
        Get all workflows with the latest version for the organization.

        Search semantics:
-        - If `search_key` is provided, its value is used as a unified search term for both
-          `workflows.title` and workflow parameter metadata (key, description, and default_value).
+        - If `search_key` is provided, its value is used as a unified search term for
+          `workflows.title`, `folders.title`, and workflow parameter metadata (key, description, and default_value).
        - If `search_key` is not provided, no search filtering is applied.
        - Parameter metadata search excludes soft-deleted parameter rows across parameter tables.
        """
@@ -1635,11 +1639,19 @@ class AgentDB:
                    )
                    .subquery()
                )
-                main_query = select(WorkflowModel).join(
-                    subquery,
-                    (WorkflowModel.organization_id == subquery.c.organization_id)
-                    & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
-                    & (WorkflowModel.version == subquery.c.max_version),
+                main_query = (
+                    select(WorkflowModel)
+                    .join(
+                        subquery,
+                        (WorkflowModel.organization_id == subquery.c.organization_id)
+                        & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
+                        & (WorkflowModel.version == subquery.c.max_version),
+                    )
+                    .outerjoin(
+                        FolderModel,
+                        (WorkflowModel.folder_id == FolderModel.folder_id)
+                        & (FolderModel.organization_id == WorkflowModel.organization_id),
+                    )
                )
                if only_saved_tasks:
                    main_query = main_query.where(WorkflowModel.is_saved_task.is_(True))
@@ -1647,9 +1659,12 @@ class AgentDB:
                    main_query = main_query.where(WorkflowModel.is_saved_task.is_(False))
                if statuses:
                    main_query = main_query.where(WorkflowModel.status.in_(statuses))
+                if folder_id:
+                    main_query = main_query.where(WorkflowModel.folder_id == folder_id)
                if search_key:
                    search_like = f"%{search_key}%"
                    title_like = WorkflowModel.title.ilike(search_like)
+                    folder_title_like = FolderModel.title.ilike(search_like)

                    parameter_filters = [
                        # WorkflowParameterModel
@@ -1771,7 +1786,7 @@ class AgentDB:
                            )
                        ),
                    ]
-                    main_query = main_query.where(or_(title_like, or_(*parameter_filters)))
+                    main_query = main_query.where(or_(title_like, folder_title_like, or_(*parameter_filters)))
                main_query = (
                    main_query.order_by(WorkflowModel.created_at.desc()).limit(page_size).offset(db_page * page_size)
                )
@@ -1791,6 +1806,8 @@ class AgentDB:
        version: int | None = None,
        run_with: str | None = None,
        cache_key: str | None = None,
+        status: str | None = None,
+        import_error: str | None = None,
    ) -> Workflow:
        try:
            async with self.Session() as session:
@@ -1812,6 +1829,10 @@ class AgentDB:
                        workflow.run_with = run_with
                    if cache_key is not None:
                        workflow.cache_key = cache_key
+                    if status is not None:
+                        workflow.status = status
+                    if import_error is not None:
+                        workflow.import_error = import_error
                    await session.commit()
                    await session.refresh(workflow)
                    return convert_to_workflow(workflow, self.debug_enabled)
@@ -1846,6 +1867,372 @@ class AgentDB:
            await session.execute(update_deleted_at_query)
            await session.commit()

+    async def create_folder(
+        self,
+        organization_id: str,
+        title: str,
+        description: str | None = None,
+    ) -> FolderModel:
+        """Create a new folder."""
+        try:
+            async with self.Session() as session:
+                folder = FolderModel(
+                    organization_id=organization_id,
+                    title=title,
+                    description=description,
+                )
+                session.add(folder)
+                await session.commit()
+                await session.refresh(folder)
+                return folder
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in create_folder", exc_info=True)
+            raise
+
+    async def get_folders(
+        self,
+        organization_id: str,
+        page: int = 1,
+        page_size: int = 10,
+        search_query: str | None = None,
+    ) -> list[FolderModel]:
+        """Get all folders for an organization with pagination and optional search."""
+        try:
+            async with self.Session() as session:
+                stmt = (
+                    select(FolderModel)
+                    .filter_by(organization_id=organization_id)
+                    .filter(FolderModel.deleted_at.is_(None))
+                )
+
+                if search_query:
+                    search_pattern = f"%{search_query}%"
+                    stmt = stmt.filter(
+                        or_(
+                            FolderModel.title.ilike(search_pattern),
+                            FolderModel.description.ilike(search_pattern),
+                        )
+                    )
+
+                stmt = stmt.order_by(FolderModel.modified_at.desc())
+                stmt = stmt.offset((page - 1) * page_size).limit(page_size)
+
+                result = await session.execute(stmt)
+                return list(result.scalars().all())
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in get_folders", exc_info=True)
+            raise
+
+    async def get_folder(
+        self,
+        folder_id: str,
+        organization_id: str,
+    ) -> FolderModel | None:
+        """Get a folder by ID."""
+        try:
+            async with self.Session() as session:
+                stmt = (
+                    select(FolderModel)
+                    .filter_by(folder_id=folder_id, organization_id=organization_id)
+                    .filter(FolderModel.deleted_at.is_(None))
+                )
+                result = await session.execute(stmt)
+                return result.scalar_one_or_none()
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in get_folder", exc_info=True)
+            raise
+
+    async def update_folder(
+        self,
+        folder_id: str,
+        organization_id: str,
+        title: str | None = None,
+        description: str | None = None,
+    ) -> FolderModel | None:
+        """Update a folder's title or description."""
+        try:
+            async with self.Session() as session:
+                stmt = (
+                    select(FolderModel)
+                    .filter_by(folder_id=folder_id, organization_id=organization_id)
+                    .filter(FolderModel.deleted_at.is_(None))
+                )
+                result = await session.execute(stmt)
+                folder = result.scalar_one_or_none()
+                if not folder:
+                    return None
+
+                if title is not None:
+                    folder.title = title
+                if description is not None:
+                    folder.description = description
+
+                folder.modified_at = datetime.utcnow()
+                await session.commit()
+                await session.refresh(folder)
+                return folder
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in update_folder", exc_info=True)
+            raise
+
+    async def get_workflow_permanent_ids_in_folder(
+        self,
+        folder_id: str,
+        organization_id: str,
+    ) -> list[str]:
+        """Get workflow permanent IDs (latest versions only) in a folder."""
+        try:
+            async with self.Session() as session:
+                # Subquery to get the latest version for each workflow
+                subquery = (
+                    select(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                        func.max(WorkflowModel.version).label("max_version"),
+                    )
+                    .where(WorkflowModel.organization_id == organization_id)
+                    .where(WorkflowModel.deleted_at.is_(None))
+                    .group_by(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                    )
+                    .subquery()
+                )
+
+                # Get workflow_permanent_ids where the latest version is in this folder
+                stmt = (
+                    select(WorkflowModel.workflow_permanent_id)
+                    .join(
+                        subquery,
+                        (WorkflowModel.organization_id == subquery.c.organization_id)
+                        & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
+                        & (WorkflowModel.version == subquery.c.max_version),
+                    )
+                    .where(WorkflowModel.folder_id == folder_id)
+                )
+                result = await session.execute(stmt)
+                return list(result.scalars().all())
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in get_workflow_permanent_ids_in_folder", exc_info=True)
+            raise
+
+    async def soft_delete_folder(
+        self,
+        folder_id: str,
+        organization_id: str,
+        delete_workflows: bool = False,
+    ) -> bool:
+        """Soft delete a folder. Optionally delete all workflows in the folder."""
+        try:
+            async with self.Session() as session:
+                # Check if folder exists
+                folder_stmt = (
+                    select(FolderModel)
+                    .filter_by(folder_id=folder_id, organization_id=organization_id)
+                    .filter(FolderModel.deleted_at.is_(None))
+                )
+                folder_result = await session.execute(folder_stmt)
+                folder = folder_result.scalar_one_or_none()
+                if not folder:
+                    return False
+
+                # If delete_workflows is True, delete all workflows in the folder
+                if delete_workflows:
+                    # Get workflow permanent IDs in the folder (inline logic)
+                    subquery = (
+                        select(
+                            WorkflowModel.organization_id,
+                            WorkflowModel.workflow_permanent_id,
+                            func.max(WorkflowModel.version).label("max_version"),
+                        )
+                        .where(WorkflowModel.organization_id == organization_id)
+                        .where(WorkflowModel.deleted_at.is_(None))
+                        .group_by(
+                            WorkflowModel.organization_id,
+                            WorkflowModel.workflow_permanent_id,
+                        )
+                        .subquery()
+                    )
+
+                    workflow_permanent_ids_stmt = (
+                        select(WorkflowModel.workflow_permanent_id)
+                        .join(
+                            subquery,
+                            (WorkflowModel.organization_id == subquery.c.organization_id)
+                            & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
+                            & (WorkflowModel.version == subquery.c.max_version),
+                        )
+                        .where(WorkflowModel.folder_id == folder_id)
+                    )
+                    result = await session.execute(workflow_permanent_ids_stmt)
+                    workflow_permanent_ids = list(result.scalars().all())
+
+                    # Soft delete all workflows with these permanent IDs in a single bulk update
+                    if workflow_permanent_ids:
+                        update_workflows_query = (
+                            update(WorkflowModel)
+                            .where(WorkflowModel.workflow_permanent_id.in_(workflow_permanent_ids))
+                            .where(WorkflowModel.organization_id == organization_id)
+                            .where(WorkflowModel.deleted_at.is_(None))
+                            .values(deleted_at=datetime.utcnow())
+                        )
+                        await session.execute(update_workflows_query)
+                else:
+                    # Just remove folder_id from all workflows in this folder
+                    update_workflows_query = (
+                        update(WorkflowModel)
+                        .where(WorkflowModel.folder_id == folder_id)
+                        .where(WorkflowModel.organization_id == organization_id)
+                        .values(folder_id=None, modified_at=datetime.utcnow())
+                    )
+                    await session.execute(update_workflows_query)
+
+                # Soft delete the folder
+                folder.deleted_at = datetime.utcnow()
+                await session.commit()
+                return True
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in soft_delete_folder", exc_info=True)
+            raise
+
+    async def get_folder_workflow_count(
+        self,
+        folder_id: str,
+        organization_id: str,
+    ) -> int:
+        """Get the count of workflows (latest versions only) in a folder."""
+        try:
+            async with self.Session() as session:
+                # Subquery to get the latest version for each workflow (same pattern as get_workflows_by_organization_id)
+                subquery = (
+                    select(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                        func.max(WorkflowModel.version).label("max_version"),
+                    )
+                    .where(WorkflowModel.organization_id == organization_id)
+                    .where(WorkflowModel.deleted_at.is_(None))
+                    .group_by(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                    )
+                    .subquery()
+                )
+
+                # Count workflows where the latest version is in this folder
+                stmt = (
+                    select(func.count(WorkflowModel.workflow_permanent_id))
+                    .join(
+                        subquery,
+                        (WorkflowModel.organization_id == subquery.c.organization_id)
+                        & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
+                        & (WorkflowModel.version == subquery.c.max_version),
+                    )
+                    .where(WorkflowModel.folder_id == folder_id)
+                )
+                result = await session.execute(stmt)
+                return result.scalar_one()
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in get_folder_workflow_count", exc_info=True)
+            raise
+
+    async def get_folder_workflow_counts_batch(
+        self,
+        folder_ids: list[str],
+        organization_id: str,
+    ) -> dict[str, int]:
+        """Get workflow counts for multiple folders in a single query."""
+        try:
+            async with self.Session() as session:
+                # Subquery to get the latest version for each workflow
+                subquery = (
+                    select(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                        func.max(WorkflowModel.version).label("max_version"),
+                    )
+                    .where(WorkflowModel.organization_id == organization_id)
+                    .where(WorkflowModel.deleted_at.is_(None))
+                    .group_by(
+                        WorkflowModel.organization_id,
+                        WorkflowModel.workflow_permanent_id,
+                    )
+                    .subquery()
+                )
+
+                # Count workflows grouped by folder_id
+                stmt = (
+                    select(
+                        WorkflowModel.folder_id,
+                        func.count(WorkflowModel.workflow_permanent_id).label("count"),
+                    )
+                    .join(
+                        subquery,
+                        (WorkflowModel.organization_id == subquery.c.organization_id)
+                        & (WorkflowModel.workflow_permanent_id == subquery.c.workflow_permanent_id)
+                        & (WorkflowModel.version == subquery.c.max_version),
+                    )
+                    .where(WorkflowModel.folder_id.in_(folder_ids))
+                    .group_by(WorkflowModel.folder_id)
+                )
+                result = await session.execute(stmt)
+                rows = result.all()
+
+                # Convert to dict, defaulting to 0 for folders with no workflows
+                return {row.folder_id: row.count for row in rows}
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in get_folder_workflow_counts_batch", exc_info=True)
+            raise
+
+    async def update_workflow_folder(
+        self,
+        workflow_permanent_id: str,
+        organization_id: str,
+        folder_id: str | None,
+    ) -> Workflow | None:
+        """Update folder assignment for the latest version of a workflow."""
+        try:
+            # Get the latest version of the workflow
+            latest_workflow = await self.get_workflow_by_permanent_id(
+                workflow_permanent_id=workflow_permanent_id,
+                organization_id=organization_id,
+            )
+
+            if not latest_workflow:
+                return None
+
+            async with self.Session() as session:
+                # Validate folder exists in-org if folder_id is provided
+                if folder_id:
+                    stmt = (
+                        select(FolderModel.folder_id)
+                        .where(FolderModel.folder_id == folder_id)
+                        .where(FolderModel.organization_id == organization_id)
+                        .where(FolderModel.deleted_at.is_(None))
+                    )
+                    if (await session.scalar(stmt)) is None:
+                        raise ValueError(f"Folder {folder_id} not found")
+
+                workflow_model = await session.get(WorkflowModel, latest_workflow.workflow_id)
+                if workflow_model:
+                    workflow_model.folder_id = folder_id
+                    workflow_model.modified_at = datetime.utcnow()
+
+                    # Update folder's modified_at in the same transaction
+                    if folder_id:
+                        folder_model = await session.get(FolderModel, folder_id)
+                        if folder_model:
+                            folder_model.modified_at = datetime.utcnow()
+
+                    await session.commit()
+                    await session.refresh(workflow_model)
+
+                    return convert_to_workflow(workflow_model, self.debug_enabled)
+                return None
+        except SQLAlchemyError:
+            LOG.error("SQLAlchemyError in update_workflow_folder", exc_info=True)
+            raise
+
    async def create_workflow_run(
        self,
        workflow_permanent_id: str,
--- a/skyvern/forge/sdk/db/id.py
+++ b/skyvern/forge/sdk/db/id.py
@@ -39,6 +39,7 @@ CREDENTIAL_AZURE_VAULT_PARAMETER_PREFIX = "azcp"
 CREDENTIAL_PARAMETER_PREFIX = "cp"
 CREDENTIAL_PREFIX = "cred"
 DEBUG_SESSION_PREFIX = "ds"
+FOLDER_PREFIX = "fld"
 BROWSER_PROFILE_PREFIX = "bp"
 ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc"
 TASK_V2_ID = "tsk_v2"
@@ -220,6 +221,11 @@ def generate_debug_session_id() -> str:
    return f"{DEBUG_SESSION_PREFIX}_{int_id}"


+def generate_folder_id() -> str:
+    int_id = generate_id()
+    return f"{FOLDER_PREFIX}_{int_id}"
+
+
 def generate_organization_bitwarden_collection_id() -> str:
    int_id = generate_id()
    return f"{ORGANIZATION_BITWARDEN_COLLECTION_PREFIX}_{int_id}"
--- a/skyvern/forge/sdk/db/models.py
+++ b/skyvern/forge/sdk/db/models.py
@@ -32,6 +32,7 @@ from skyvern.forge.sdk.db.id import (
    generate_credential_id,
    generate_credential_parameter_id,
    generate_debug_session_id,
+    generate_folder_id,
    generate_onepassword_credential_parameter_id,
    generate_org_id,
    generate_organization_auth_token_id,
@@ -218,6 +219,28 @@ class ArtifactModel(Base):
    )


+class FolderModel(Base):
+    __tablename__ = "folders"
+    __table_args__ = (
+        Index("folder_organization_id_idx", "organization_id"),
+        Index("folder_organization_title_idx", "organization_id", "title"),
+    )
+
+    folder_id = Column(String, primary_key=True, default=generate_folder_id)
+    organization_id = Column(String, ForeignKey("organizations.organization_id", ondelete="CASCADE"), nullable=False)
+    title = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+
+    created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
+    modified_at = Column(
+        DateTime,
+        default=datetime.datetime.utcnow,
+        onupdate=datetime.datetime.utcnow,
+        nullable=False,
+    )
+    deleted_at = Column(DateTime, nullable=True)
+
+
 class WorkflowModel(Base):
    __tablename__ = "workflows"
    __table_args__ = (
@@ -230,6 +253,7 @@ class WorkflowModel(Base):
        Index("permanent_id_version_idx", "workflow_permanent_id", "version"),
        Index("organization_id_title_idx", "organization_id", "title"),
        Index("workflow_oid_status_idx", "organization_id", "status"),
+        Index("workflow_folder_id_idx", "folder_id"),
    )

    workflow_id = Column(String, primary_key=True, default=generate_workflow_id)
@@ -252,6 +276,8 @@ class WorkflowModel(Base):
    cache_key = Column(String, nullable=True)
    run_sequentially = Column(Boolean, nullable=True)
    sequential_key = Column(String, nullable=True)
+    folder_id = Column(String, ForeignKey("folders.folder_id", ondelete="SET NULL"), nullable=True)
+    import_error = Column(String, nullable=True)  # Error message if import failed

    created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
    modified_at = Column(
--- a/skyvern/forge/sdk/db/utils.py
+++ b/skyvern/forge/sdk/db/utils.py
@@ -288,6 +288,8 @@ def convert_to_workflow(workflow_model: WorkflowModel, debug_enabled: bool = Fal
        cache_key=workflow_model.cache_key,
        run_sequentially=workflow_model.run_sequentially,
        sequential_key=workflow_model.sequential_key,
+        folder_id=workflow_model.folder_id,
+        import_error=workflow_model.import_error,
    )


--- a/skyvern/forge/sdk/routes/agent_protocol.py
+++ b/skyvern/forge/sdk/routes/agent_protocol.py
@@ -1,9 +1,22 @@
+import asyncio
 from enum import Enum
 from typing import Annotated, Any

 import structlog
 import yaml
-from fastapi import BackgroundTasks, Depends, Header, HTTPException, Path, Query, Request, Response, UploadFile, status
+from fastapi import (
+    BackgroundTasks,
+    Body,
+    Depends,
+    Header,
+    HTTPException,
+    Path,
+    Query,
+    Request,
+    Response,
+    UploadFile,
+)
+from fastapi import status as http_status
 from fastapi.responses import ORJSONResponse

 from skyvern import analytics
@@ -84,6 +97,7 @@ from skyvern.forge.sdk.workflow.models.workflow import (
    WorkflowRunWithWorkflowResponse,
 )
 from skyvern.schemas.artifacts import EntityType, entity_type_to_param
+from skyvern.schemas.folders import Folder, FolderCreate, FolderUpdate, UpdateWorkflowFolderRequest
 from skyvern.schemas.runs import (
    CUA_ENGINES,
    BlockRunRequest,
@@ -410,7 +424,7 @@ async def get_run(
    run_response = await run_service.get_run_response(run_id, organization_id=current_org.organization_id)
    if not run_response:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Task run not found {run_id}",
        )
    return run_response
@@ -604,6 +618,22 @@ async def create_workflow_from_prompt(
    return workflow.model_dump(by_alias=True)


+async def _validate_file_size(file: UploadFile) -> UploadFile:
+    try:
+        file.file.seek(0, 2)  # Move the pointer to the end of the file
+        size = file.file.tell()  # Get the current position of the pointer, which represents the file size
+        file.file.seek(0)  # Reset the pointer back to the beginning
+    except Exception as e:
+        raise HTTPException(status_code=500, detail="Could not determine file size.") from e
+
+    if size > app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE:
+        raise HTTPException(
+            status_code=413,
+            detail=f"File size exceeds the maximum allowed size ({app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE / 1024 / 1024} MB)",
+        )
+    return file
+
+
@legacy_base_router.post(
    "/workflows/import-pdf",
    response_model=dict[str, Any],
@@ -636,13 +666,97 @@ async def create_workflow_from_prompt(
    include_in_schema=False,
 )
 async def import_workflow_from_pdf(
-    file: UploadFile,
+    background_tasks: BackgroundTasks,
+    file: UploadFile = Depends(_validate_file_size),
    current_org: Organization = Depends(org_auth_service.get_current_org),
 ) -> dict[str, Any]:
    """Import a workflow from a PDF file containing Standard Operating Procedures."""
    analytics.capture("skyvern-oss-workflow-import-pdf")

-    return await pdf_import_service.import_workflow_from_pdf(file, current_org)
+    # Read file and validate early (before creating import record)
+    if not file.filename or not file.filename.lower().endswith(".pdf"):
+        raise HTTPException(status_code=400, detail="Only PDF files are supported.")
+
+    try:
+        file_contents = await file.read()
+        file_name = file.filename
+    finally:
+        # Release underlying SpooledTemporaryFile ASAP
+        await file.close()
+
+    # Extract text in executor to avoid blocking event loop (1-2 seconds)
+    try:
+        sop_text = await asyncio.to_thread(
+            pdf_import_service.extract_text_from_pdf,
+            file_contents,
+            file_name,
+        )
+    except HTTPException:
+        # Re-raise validation errors immediately
+        raise
+
+    # Validation passed! Create empty workflow v1 with status='importing'
+    empty_workflow = await app.DATABASE.create_workflow(
+        title=f"Importing {file_name}",
+        workflow_definition={"parameters": [], "blocks": []},
+        organization_id=current_org.organization_id,
+        status=WorkflowStatus.importing,
+    )
+
+    # Process PDF import in background (LLM call is the slow part)
+    async def process_pdf_import() -> None:
+        try:
+            # Create workflow from extracted text (LLM processing)
+            result = await pdf_import_service.create_workflow_from_sop_text(sop_text, current_org)
+
+            # Create v2 with real content
+            await app.WORKFLOW_SERVICE.create_workflow_from_request(
+                organization=current_org,
+                request=WorkflowCreateYAMLRequest.model_validate(result),
+                workflow_permanent_id=empty_workflow.workflow_permanent_id,
+            )
+
+            # Update v1 status to published (v1 won't show in list since v2 is latest version)
+            await app.DATABASE.update_workflow(
+                workflow_id=empty_workflow.workflow_id,
+                organization_id=current_org.organization_id,
+                status=WorkflowStatus.published,
+            )
+
+            LOG.info(
+                "Workflow import completed",
+                workflow_permanent_id=empty_workflow.workflow_permanent_id,
+                organization_id=current_org.organization_id,
+            )
+        except Exception as e:
+            # Log full error server-side for debugging
+            LOG.exception(
+                "Workflow import failed",
+                workflow_permanent_id=empty_workflow.workflow_permanent_id,
+                error=str(e),
+                organization_id=current_org.organization_id,
+            )
+
+            # Provide sanitized user-facing error message (don't expose internal details/PII)
+            sanitized_error = "Import failed. Please verify the PDF content and try again."
+
+            # Mark v1 as import_failed with sanitized error
+            await app.DATABASE.update_workflow(
+                workflow_id=empty_workflow.workflow_id,
+                organization_id=current_org.organization_id,
+                status=WorkflowStatus.import_failed,
+                import_error=sanitized_error,
+            )
+
+    background_tasks.add_task(process_pdf_import)
+
+    return {
+        "workflow_permanent_id": empty_workflow.workflow_permanent_id,
+        "status": "importing",
+        "file_name": file.filename,
+        "organization_id": current_org.organization_id,
+        "created_at": empty_workflow.created_at.isoformat(),
+    }


@legacy_base_router.put(
@@ -817,6 +931,251 @@ async def delete_workflow(
    await app.WORKFLOW_SERVICE.delete_workflow_by_permanent_id(workflow_id, current_org.organization_id)


+################# Folder Endpoints #################
+@legacy_base_router.post("/folders", response_model=Folder, tags=["agent"])
+@legacy_base_router.post("/folders/", response_model=Folder, include_in_schema=False)
+@base_router.post(
+    "/folders",
+    response_model=Folder,
+    tags=["Workflows"],
+    description="Create a new folder to organize workflows",
+    summary="Create folder",
+    responses={
+        200: {"description": "Successfully created folder"},
+        400: {"description": "Invalid request"},
+    },
+)
+@base_router.post("/folders/", response_model=Folder, include_in_schema=False)
+async def create_folder(
+    data: FolderCreate,
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> Folder:
+    analytics.capture("skyvern-oss-folder-create")
+    folder_model = await app.DATABASE.create_folder(
+        organization_id=current_org.organization_id,
+        title=data.title,
+        description=data.description,
+    )
+    workflow_count = await app.DATABASE.get_folder_workflow_count(
+        folder_id=folder_model.folder_id,
+        organization_id=current_org.organization_id,
+    )
+    return Folder(
+        folder_id=folder_model.folder_id,
+        organization_id=folder_model.organization_id,
+        title=folder_model.title,
+        description=folder_model.description,
+        workflow_count=workflow_count,
+        created_at=folder_model.created_at,
+        modified_at=folder_model.modified_at,
+    )
+
+
+@legacy_base_router.get("/folders/{folder_id}", response_model=Folder, tags=["agent"])
+@legacy_base_router.get("/folders/{folder_id}/", response_model=Folder, include_in_schema=False)
+@base_router.get(
+    "/folders/{folder_id}",
+    response_model=Folder,
+    tags=["Workflows"],
+    description="Get a specific folder by ID",
+    summary="Get folder",
+    responses={
+        200: {"description": "Successfully retrieved folder"},
+        404: {"description": "Folder not found"},
+    },
+)
+@base_router.get("/folders/{folder_id}/", response_model=Folder, include_in_schema=False)
+async def get_folder(
+    folder_id: str = Path(..., description="Folder ID", examples=["fld_123"]),
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> Folder:
+    folder = await app.DATABASE.get_folder(
+        folder_id=folder_id,
+        organization_id=current_org.organization_id,
+    )
+    if not folder:
+        raise HTTPException(status_code=http_status.HTTP_404_NOT_FOUND, detail=f"Folder {folder_id} not found")
+
+    workflow_count = await app.DATABASE.get_folder_workflow_count(
+        folder_id=folder.folder_id,
+        organization_id=current_org.organization_id,
+    )
+
+    return Folder(
+        folder_id=folder.folder_id,
+        organization_id=folder.organization_id,
+        title=folder.title,
+        description=folder.description,
+        workflow_count=workflow_count,
+        created_at=folder.created_at,
+        modified_at=folder.modified_at,
+    )
+
+
+@legacy_base_router.get("/folders", response_model=list[Folder], tags=["agent"])
+@legacy_base_router.get("/folders/", response_model=list[Folder], include_in_schema=False)
+@base_router.get(
+    "/folders",
+    response_model=list[Folder],
+    tags=["Workflows"],
+    description="Get all folders for the organization",
+    summary="Get folders",
+    responses={
+        200: {"description": "Successfully retrieved folders"},
+    },
+)
+@base_router.get("/folders/", response_model=list[Folder], include_in_schema=False)
+async def get_folders(
+    page: int = Query(1, ge=1, description="Page number"),
+    page_size: int = Query(100, ge=1, le=500, description="Number of folders per page"),
+    search: str | None = Query(None, description="Search folders by title or description"),
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> list[Folder]:
+    folders = await app.DATABASE.get_folders(
+        organization_id=current_org.organization_id,
+        page=page,
+        page_size=page_size,
+        search_query=search,
+    )
+
+    # Get workflow counts for all folders in a single query
+    if folders:
+        folder_ids = [folder.folder_id for folder in folders]
+        workflow_counts = await app.DATABASE.get_folder_workflow_counts_batch(
+            folder_ids=folder_ids,
+            organization_id=current_org.organization_id,
+        )
+    else:
+        workflow_counts = {}
+
+    # Build result with workflow counts
+    result = []
+    for folder in folders:
+        result.append(
+            Folder(
+                folder_id=folder.folder_id,
+                organization_id=folder.organization_id,
+                title=folder.title,
+                description=folder.description,
+                workflow_count=workflow_counts.get(folder.folder_id, 0),
+                created_at=folder.created_at,
+                modified_at=folder.modified_at,
+            )
+        )
+
+    return result
+
+
+@legacy_base_router.put("/folders/{folder_id}", response_model=Folder, tags=["agent"])
+@legacy_base_router.put("/folders/{folder_id}/", response_model=Folder, include_in_schema=False)
+@base_router.put(
+    "/folders/{folder_id}",
+    response_model=Folder,
+    tags=["Workflows"],
+    description="Update a folder's title or description",
+    summary="Update folder",
+    responses={
+        200: {"description": "Successfully updated folder"},
+        404: {"description": "Folder not found"},
+    },
+)
+@base_router.put("/folders/{folder_id}/", response_model=Folder, include_in_schema=False)
+async def update_folder(
+    folder_id: str = Path(..., description="Folder ID", examples=["fld_123"]),
+    data: FolderUpdate = Body(...),
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> Folder:
+    folder = await app.DATABASE.update_folder(
+        folder_id=folder_id,
+        organization_id=current_org.organization_id,
+        title=data.title,
+        description=data.description,
+    )
+    if not folder:
+        raise HTTPException(status_code=http_status.HTTP_404_NOT_FOUND, detail=f"Folder {folder_id} not found")
+
+    workflow_count = await app.DATABASE.get_folder_workflow_count(
+        folder_id=folder.folder_id,
+        organization_id=current_org.organization_id,
+    )
+
+    return Folder(
+        folder_id=folder.folder_id,
+        organization_id=folder.organization_id,
+        title=folder.title,
+        description=folder.description,
+        workflow_count=workflow_count,
+        created_at=folder.created_at,
+        modified_at=folder.modified_at,
+    )
+
+
+@legacy_base_router.delete("/folders/{folder_id}", tags=["agent"])
+@legacy_base_router.delete("/folders/{folder_id}/", include_in_schema=False)
+@base_router.delete(
+    "/folders/{folder_id}",
+    tags=["Workflows"],
+    description="Delete a folder. Optionally delete all workflows in the folder.",
+    summary="Delete folder",
+    responses={
+        200: {"description": "Successfully deleted folder"},
+        404: {"description": "Folder not found"},
+    },
+)
+@base_router.delete("/folders/{folder_id}/", include_in_schema=False)
+async def delete_folder(
+    folder_id: str = Path(..., description="Folder ID", examples=["fld_123"]),
+    delete_workflows: bool = Query(False, description="If true, also delete all workflows in this folder"),
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> dict:
+    analytics.capture("skyvern-oss-folder-delete")
+    success = await app.DATABASE.soft_delete_folder(
+        folder_id=folder_id,
+        organization_id=current_org.organization_id,
+        delete_workflows=delete_workflows,
+    )
+    if not success:
+        raise HTTPException(status_code=http_status.HTTP_404_NOT_FOUND, detail=f"Folder {folder_id} not found")
+
+    return {"status": "deleted", "folder_id": folder_id, "workflows_deleted": delete_workflows}
+
+
+@legacy_base_router.put("/workflows/{workflow_permanent_id}/folder", response_model=Workflow, tags=["agent"])
+@legacy_base_router.put("/workflows/{workflow_permanent_id}/folder/", response_model=Workflow, include_in_schema=False)
+@base_router.put(
+    "/workflows/{workflow_permanent_id}/folder",
+    response_model=Workflow,
+    tags=["Workflows"],
+    description="Update a workflow's folder assignment for the latest version",
+    summary="Update workflow folder",
+    responses={
+        200: {"description": "Successfully updated workflow folder"},
+        404: {"description": "Workflow not found"},
+        400: {"description": "Folder not found"},
+    },
+)
+@base_router.put("/workflows/{workflow_permanent_id}/folder/", response_model=Workflow, include_in_schema=False)
+async def update_workflow_folder(
+    workflow_permanent_id: str = Path(..., description="Workflow permanent ID", examples=["wpid_123"]),
+    data: UpdateWorkflowFolderRequest = Body(...),
+    current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> Workflow:
+    try:
+        workflow = await app.DATABASE.update_workflow_folder(
+            workflow_permanent_id=workflow_permanent_id,
+            organization_id=current_org.organization_id,
+            folder_id=data.folder_id,
+        )
+        if not workflow:
+            raise HTTPException(
+                status_code=http_status.HTTP_404_NOT_FOUND, detail=f"Workflow {workflow_permanent_id} not found"
+            )
+
+        return workflow
+    except ValueError as e:
+        raise HTTPException(status_code=http_status.HTTP_400_BAD_REQUEST, detail=str(e)) from e
+
+
@legacy_base_router.post(
    "/utilities/curl-to-http",
    tags=["Utilities"],
@@ -896,7 +1255,7 @@ async def get_artifact(
    )
    if not artifact:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Artifact not found {artifact_id}",
        )
    if settings.ENV != "local" or settings.GENERATE_PRESIGNED_URLS:
@@ -1027,7 +1386,7 @@ async def get_run_timeline(
    run_response = await run_service.get_run_response(run_id, organization_id=current_org.organization_id)
    if not run_response:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Run not found {run_id}",
        )

@@ -1040,13 +1399,13 @@ async def get_run_timeline(
        task_v2 = await app.DATABASE.get_task_v2(task_v2_id=run_id, organization_id=current_org.organization_id)
        if not task_v2:
            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
+                status_code=http_status.HTTP_404_NOT_FOUND,
                detail=f"Task v2 not found {run_id}",
            )

        if not task_v2.workflow_run_id:
            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
+                status_code=http_status.HTTP_400_BAD_REQUEST,
                detail=f"Task v2 {run_id} has no associated workflow run",
            )

@@ -1054,7 +1413,7 @@ async def get_run_timeline(

    # Timeline not available for other run types
    raise HTTPException(
-        status_code=status.HTTP_400_BAD_REQUEST,
+        status_code=http_status.HTTP_400_BAD_REQUEST,
        detail=f"Timeline not available for run type {run_response.run_type}",
    )

@@ -1149,7 +1508,7 @@ async def webhook(
            payload=payload,
        )
        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=http_status.HTTP_400_BAD_REQUEST,
            detail="Missing webhook signature or timestamp",
        )

@@ -1270,7 +1629,7 @@ async def cancel_task(
    task_obj = await app.DATABASE.get_task(task_id, organization_id=current_org.organization_id)
    if not task_obj:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Task not found {task_id}",
        )
    task = await app.agent.update_task(task_obj, status=TaskStatus.canceled)
@@ -1288,7 +1647,7 @@ async def _cancel_workflow_run(workflow_run_id: str, organization_id: str, x_api

    if not workflow_run:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Workflow run not found {workflow_run_id}",
        )

@@ -1324,7 +1683,7 @@ async def _continue_workflow_run(workflow_run_id: str, organization_id: str) ->

    if not workflow_run:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Workflow run not found {workflow_run_id}",
        )

@@ -1398,7 +1757,7 @@ async def retry_webhook(
    task_obj = await app.DATABASE.get_task(task_id, organization_id=current_org.organization_id)
    if not task_obj:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Task not found {task_id}",
        )

@@ -1452,7 +1811,7 @@ async def get_tasks(
    analytics.capture("skyvern-oss-agent-tasks-get")
    if only_standalone_tasks and workflow_run_id:
        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=http_status.HTTP_400_BAD_REQUEST,
            detail="only_standalone_tasks and workflow_run_id cannot be used together",
        )
    tasks = await app.DATABASE.get_tasks(
@@ -1560,7 +1919,7 @@ async def get_artifacts(

    if entity_type not in entity_type_to_param:
        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=http_status.HTTP_400_BAD_REQUEST,
            detail=f"Invalid entity_type: {entity_type}",
        )

@@ -1828,7 +2187,7 @@ async def get_workflow_and_run_from_workflow_run_id(

    if not workflow:
        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
+            status_code=http_status.HTTP_404_NOT_FOUND,
            detail=f"Workflow run not found {workflow_run_id}",
        )

@@ -1926,9 +2285,11 @@ async def get_workflows(
    only_workflows: bool = Query(False),
    search_key: str | None = Query(
        None,
-        description="Unified search across workflow title and parameter metadata (key, description, default_value).",
+        description="Unified search across workflow title, folder name, and parameter metadata (key, description, default_value).",
    ),
    title: str = Query("", deprecated=True, description="Deprecated: use search_key instead."),
+    folder_id: str | None = Query(None, description="Filter workflows by folder ID"),
+    status: Annotated[list[WorkflowStatus] | None, Query()] = None,
    current_org: Organization = Depends(org_auth_service.get_current_org),
    template: bool = Query(False),
 ) -> list[Workflow]:
@@ -1936,8 +2297,8 @@ async def get_workflows(
    Get all workflows with the latest version for the organization.

    Search semantics:
-    - If `search_key` is provided, its value is used as a unified search term for both
-      `workflows.title` and workflow parameter metadata (key, description, and default_value for
+    - If `search_key` is provided, its value is used as a unified search term for
+      `workflows.title`, `folders.title`, and workflow parameter metadata (key, description, and default_value for
      `WorkflowParameterModel`).
    - Falls back to deprecated `title` (title-only search) if `search_key` is not provided.
    - Parameter metadata search excludes soft-deleted parameter rows across all parameter tables.
@@ -1947,6 +2308,9 @@ async def get_workflows(
    # Determine the effective search term: prioritize search_key, fallback to title
    effective_search = search_key or (title if title else None)

+    # Default to published and draft if no status filter provided
+    effective_statuses = status if status else [WorkflowStatus.published, WorkflowStatus.draft]
+
    if template:
        global_workflows_permanent_ids = await app.STORAGE.retrieve_global_workflows()
        if not global_workflows_permanent_ids:
@@ -1956,13 +2320,13 @@ async def get_workflows(
            page=page,
            page_size=page_size,
            search_key=effective_search or "",
-            statuses=[WorkflowStatus.published, WorkflowStatus.draft],
+            statuses=effective_statuses,
        )
        return workflows

    if only_saved_tasks and only_workflows:
        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
+            status_code=http_status.HTTP_400_BAD_REQUEST,
            detail="only_saved_tasks and only_workflows cannot be used together",
        )

@@ -1973,7 +2337,8 @@ async def get_workflows(
        only_saved_tasks=only_saved_tasks,
        only_workflows=only_workflows,
        search_key=effective_search,
-        statuses=[WorkflowStatus.published, WorkflowStatus.draft],
+        folder_id=folder_id,
+        statuses=effective_statuses,
    )


@@ -2182,22 +2547,6 @@ async def get_api_keys(
    return GetOrganizationAPIKeysResponse(api_keys=api_keys)


-async def _validate_file_size(file: UploadFile) -> UploadFile:
-    try:
-        file.file.seek(0, 2)  # Move the pointer to the end of the file
-        size = file.file.tell()  # Get the current position of the pointer, which represents the file size
-        file.file.seek(0)  # Reset the pointer back to the beginning
-    except Exception as e:
-        raise HTTPException(status_code=500, detail="Could not determine file size.") from e
-
-    if size > app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE:
-        raise HTTPException(
-            status_code=413,
-            detail=f"File size exceeds the maximum allowed size ({app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE / 1024 / 1024} MB)",
-        )
-    return file
-
-
@legacy_base_router.post(
    "/upload_file",
    tags=["server"],
--- a/skyvern/forge/sdk/workflow/models/workflow.py
+++ b/skyvern/forge/sdk/workflow/models/workflow.py
@@ -83,6 +83,8 @@ class Workflow(BaseModel):
    cache_key: str | None = None
    run_sequentially: bool | None = None
    sequential_key: str | None = None
+    folder_id: str | None = None
+    import_error: str | None = None

    created_at: datetime
    modified_at: datetime
--- a/skyvern/forge/sdk/workflow/service.py
+++ b/skyvern/forge/sdk/workflow/service.py
@@ -1098,6 +1098,7 @@ class WorkflowService:
        ai_fallback: bool | None = None,
        run_sequentially: bool = False,
        sequential_key: str | None = None,
+        folder_id: str | None = None,
    ) -> Workflow:
        return await app.DATABASE.create_workflow(
            title=title,
@@ -1121,6 +1122,7 @@ class WorkflowService:
            ai_fallback=False if ai_fallback is None else ai_fallback,
            run_sequentially=run_sequentially,
            sequential_key=sequential_key,
+            folder_id=folder_id,
        )

    async def create_workflow_from_prompt(
@@ -1378,13 +1380,15 @@ class WorkflowService:
        only_saved_tasks: bool = False,
        only_workflows: bool = False,
        search_key: str | None = None,
+        folder_id: str | None = None,
        statuses: list[WorkflowStatus] | None = None,
    ) -> list[Workflow]:
        """
        Get all workflows with the latest version for the organization.

        Args:
-            search_key: Unified search term for title and parameter metadata (replaces title/parameter).
+            search_key: Unified search term for title, folder name, and parameter metadata.
+            folder_id: Filter workflows by folder ID.
        """
        return await app.DATABASE.get_workflows_by_organization_id(
            organization_id=organization_id,
@@ -1393,6 +1397,7 @@ class WorkflowService:
            only_saved_tasks=only_saved_tasks,
            only_workflows=only_workflows,
            search_key=search_key,
+            folder_id=folder_id,
            statuses=statuses,
        )

@@ -2723,6 +2728,7 @@ class WorkflowService:
                    ai_fallback=request.ai_fallback,
                    run_sequentially=request.run_sequentially,
                    sequential_key=request.sequential_key,
+                    folder_id=existing_latest_workflow.folder_id,
                )
            else:
                # NOTE: it's only potential, as it may be immediately deleted!