Add SOP-to-blocks endpoint for workflow editor - backend (#4556)
This commit is contained in:
@@ -1,5 +1,22 @@
|
|||||||
You are an AI assistant that converts Standard Operating Procedures (SOP) from text into a Skyvern workflow definition in JSON format.
|
You are an AI assistant that converts Standard Operating Procedures (SOP) from text into a Skyvern workflow definition in JSON format.
|
||||||
|
|
||||||
|
FIRST: Evaluate if this content contains procedural instructions.
|
||||||
|
|
||||||
|
A valid SOP MUST contain:
|
||||||
|
- Step-by-step instructions telling someone what to DO
|
||||||
|
- Action verbs like: go to, click, navigate, enter, fill, submit, download, extract, etc.
|
||||||
|
- Sequential steps for completing a task
|
||||||
|
|
||||||
|
Note: A document may include reference materials (like sample invoices, screenshots, or examples) alongside instructions - this is still a valid SOP as long as it contains procedural steps.
|
||||||
|
|
||||||
|
If the content has NO procedural instructions (just data, information, or documents without steps), return ONLY this JSON:
|
||||||
|
{
|
||||||
|
"error": "not_sop",
|
||||||
|
"reason": "This document does not contain procedural instructions or steps to automate."
|
||||||
|
}
|
||||||
|
|
||||||
|
If the content DOES contain procedural instructions, proceed with the conversion below.
|
||||||
|
|
||||||
REQUIRED OUTPUT FORMAT:
|
REQUIRED OUTPUT FORMAT:
|
||||||
Return a JSON object with this structure:
|
Return a JSON object with this structure:
|
||||||
{
|
{
|
||||||
@@ -68,7 +85,7 @@ CRITICAL INSTRUCTIONS - READ CAREFULLY:
|
|||||||
9. **AVOID VALIDATION BLOCKS**: Use "extraction" blocks for data extraction. Only use "validation" if explicitly validating previous extracted data, and always include complete_criterion.
|
9. **AVOID VALIDATION BLOCKS**: Use "extraction" blocks for data extraction. Only use "validation" if explicitly validating previous extracted data, and always include complete_criterion.
|
||||||
10. Set continue_on_failure to false for critical steps, true for optional ones
|
10. Set continue_on_failure to false for critical steps, true for optional ones
|
||||||
11. Set engine to "skyvern-1.0" for all blocks that need it
|
11. Set engine to "skyvern-1.0" for all blocks that need it
|
||||||
12. Use clear, descriptive labels that match the SOP terminology
|
12. Use SHORT descriptive labels (1-5 words, snake_case): e.g., "login", "extract_date_data", "submit_google_login_form". Avoid long labels.
|
||||||
|
|
||||||
EXAMPLES OF THOROUGHNESS:
|
EXAMPLES OF THOROUGHNESS:
|
||||||
- If SOP says "Navigate to page X, then click button Y, then fill form Z" → Create 3 separate blocks
|
- If SOP says "Navigate to page X, then click button Y, then fill form Z" → Create 3 separate blocks
|
||||||
|
|||||||
@@ -664,6 +664,76 @@ async def _validate_file_size(file: UploadFile) -> UploadFile:
|
|||||||
return file
|
return file
|
||||||
|
|
||||||
|
|
||||||
|
@legacy_base_router.post(
|
||||||
|
"/workflows/sop-to-blocks",
|
||||||
|
response_model=dict[str, Any],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
@legacy_base_router.post(
|
||||||
|
"/workflows/sop-to-blocks/",
|
||||||
|
response_model=dict[str, Any],
|
||||||
|
include_in_schema=False,
|
||||||
|
)
|
||||||
|
async def convert_sop_to_blocks(
|
||||||
|
file: UploadFile = Depends(_validate_file_size),
|
||||||
|
current_org: Organization = Depends(org_auth_service.get_current_org),
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Convert a PDF SOP to workflow blocks without creating a workflow."""
|
||||||
|
analytics.capture(
|
||||||
|
"skyvern-oss-workflow-sop-to-blocks",
|
||||||
|
data={"organization_id": current_org.organization_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate PDF
|
||||||
|
if not file.filename or not file.filename.lower().endswith(".pdf"):
|
||||||
|
raise HTTPException(status_code=400, detail="Only PDF files are supported.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_contents = await file.read()
|
||||||
|
file_name = file.filename
|
||||||
|
finally:
|
||||||
|
await file.close()
|
||||||
|
|
||||||
|
# Extract text from PDF
|
||||||
|
sop_text = await asyncio.to_thread(
|
||||||
|
pdf_import_service.extract_text_from_pdf,
|
||||||
|
file_contents,
|
||||||
|
file_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to workflow definition via LLM
|
||||||
|
try:
|
||||||
|
result = await pdf_import_service.create_workflow_from_sop_text(sop_text, current_org)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception(
|
||||||
|
"Failed to convert SOP to blocks",
|
||||||
|
organization_id=current_org.organization_id,
|
||||||
|
filename=file_name,
|
||||||
|
)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422,
|
||||||
|
detail="Failed to convert SOP to workflow blocks. Please verify the PDF content and try again.",
|
||||||
|
) from e
|
||||||
|
|
||||||
|
workflow_def = result.get("workflow_definition", {})
|
||||||
|
|
||||||
|
# Transform blocks: convert parameter_keys (backend format) to parameters (frontend format)
|
||||||
|
# This is done here rather than in _sanitize_workflow_json because the import-pdf endpoint
|
||||||
|
# needs the backend format for WorkflowCreateYAMLRequest validation
|
||||||
|
# Create shallow copies to avoid mutating shared data structures
|
||||||
|
blocks = [dict(block) for block in workflow_def.get("blocks", [])]
|
||||||
|
for block in blocks:
|
||||||
|
parameter_keys = block.pop("parameter_keys", None) or []
|
||||||
|
block["parameters"] = [{"key": key} for key in parameter_keys]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"blocks": blocks,
|
||||||
|
"parameters": workflow_def.get("parameters", []),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@legacy_base_router.post(
|
@legacy_base_router.post(
|
||||||
"/workflows/import-pdf",
|
"/workflows/import-pdf",
|
||||||
response_model=dict[str, Any],
|
response_model=dict[str, Any],
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ class PDFImportService:
|
|||||||
def _sanitize_workflow_json(raw: dict[str, Any]) -> dict[str, Any]:
|
def _sanitize_workflow_json(raw: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""Clean LLM JSON to match Skyvern schema conventions and avoid Jinja errors.
|
"""Clean LLM JSON to match Skyvern schema conventions and avoid Jinja errors.
|
||||||
|
|
||||||
|
- Replace whitespace in block labels with underscores
|
||||||
- Replace Jinja refs like {{workflow.foo}} or {{parameters.foo}} with {{foo}}
|
- Replace Jinja refs like {{workflow.foo}} or {{parameters.foo}} with {{foo}}
|
||||||
- Auto-populate block.parameter_keys with any referenced parameter keys
|
- Auto-populate block.parameter_keys with any referenced parameter keys
|
||||||
- Ensure all block labels are unique by appending indices to duplicates
|
- Ensure all block labels are unique by appending indices to duplicates
|
||||||
@@ -47,7 +48,23 @@ class PDFImportService:
|
|||||||
|
|
||||||
blocks = workflow_def.get("blocks", []) or []
|
blocks = workflow_def.get("blocks", []) or []
|
||||||
|
|
||||||
# First pass: deduplicate block labels
|
# First pass: sanitize block labels (replace whitespace with underscores)
|
||||||
|
for blk in blocks:
|
||||||
|
if not isinstance(blk, dict):
|
||||||
|
continue
|
||||||
|
label = blk.get("label", "")
|
||||||
|
if label:
|
||||||
|
# Replace any whitespace with underscores (same as frontend behavior)
|
||||||
|
sanitized_label = re.sub(r"\s+", "_", label)
|
||||||
|
if sanitized_label != label:
|
||||||
|
LOG.info(
|
||||||
|
"Sanitizing block label",
|
||||||
|
original_label=label,
|
||||||
|
sanitized_label=sanitized_label,
|
||||||
|
)
|
||||||
|
blk["label"] = sanitized_label
|
||||||
|
|
||||||
|
# Second pass: deduplicate block labels
|
||||||
seen_labels: dict[str, int] = {}
|
seen_labels: dict[str, int] = {}
|
||||||
deduplicated_count = 0
|
deduplicated_count = 0
|
||||||
for blk in blocks:
|
for blk in blocks:
|
||||||
@@ -130,6 +147,9 @@ class PDFImportService:
|
|||||||
if blk.get("url") is None:
|
if blk.get("url") is None:
|
||||||
blk["url"] = ""
|
blk["url"] = ""
|
||||||
|
|
||||||
|
# Note: parameter_keys is kept in backend format for WorkflowCreateYAMLRequest validation
|
||||||
|
# The sop-to-blocks endpoint transforms to frontend format separately
|
||||||
|
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def extract_text_from_pdf(self, file_contents: bytes, file_name: str) -> str:
|
def extract_text_from_pdf(self, file_contents: bytes, file_name: str) -> str:
|
||||||
@@ -212,6 +232,20 @@ class PDFImportService:
|
|||||||
)
|
)
|
||||||
raise HTTPException(status_code=422, detail="LLM returned invalid response format - expected JSON object")
|
raise HTTPException(status_code=422, detail="LLM returned invalid response format - expected JSON object")
|
||||||
|
|
||||||
|
# Check if LLM detected non-SOP content
|
||||||
|
if response.get("error") == "not_sop":
|
||||||
|
LOG.info(
|
||||||
|
"LLM detected non-SOP content",
|
||||||
|
reason=response.get("reason"),
|
||||||
|
organization_id=organization.organization_id,
|
||||||
|
)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422,
|
||||||
|
detail=response.get(
|
||||||
|
"reason", "The uploaded PDF does not appear to contain a Standard Operating Procedure."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# Validate that it has the required structure
|
# Validate that it has the required structure
|
||||||
if "workflow_definition" not in response:
|
if "workflow_definition" not in response:
|
||||||
LOG.error(
|
LOG.error(
|
||||||
|
|||||||
Reference in New Issue
Block a user