introduce complete verification (#1201)
This commit is contained in:
@@ -49,6 +49,7 @@ from skyvern.webeye.actions.actions import (
|
|||||||
Action,
|
Action,
|
||||||
ActionType,
|
ActionType,
|
||||||
CompleteAction,
|
CompleteAction,
|
||||||
|
CompleteVerifyResult,
|
||||||
DecisiveAction,
|
DecisiveAction,
|
||||||
UserDefinedError,
|
UserDefinedError,
|
||||||
WebAction,
|
WebAction,
|
||||||
@@ -923,57 +924,59 @@ class ForgeAgent:
|
|||||||
)
|
)
|
||||||
return failed_step, detailed_agent_step_output.get_clean_detailed_output()
|
return failed_step, detailed_agent_step_output.get_clean_detailed_output()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def complete_verify(page: Page, scraped_page: ScrapedPage, task: Task, step: Step) -> CompleteVerifyResult:
|
||||||
|
LOG.info(
|
||||||
|
"Checking if user goal is achieved after re-scraping the page",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
scraped_page_refreshed = await scraped_page.refresh()
|
||||||
|
|
||||||
|
# TODO: currently, just using the check user goal for complete verification
|
||||||
|
# maybe need a desinged complete criterion in the future
|
||||||
|
verification_prompt = prompt_engine.load_prompt(
|
||||||
|
"check-user-goal",
|
||||||
|
navigation_goal=task.navigation_goal,
|
||||||
|
navigation_payload=task.navigation_payload,
|
||||||
|
elements=scraped_page_refreshed.build_element_tree(ElementTreeFormat.HTML),
|
||||||
|
)
|
||||||
|
|
||||||
|
# this prompt is critical to our agent so let's use the primary LLM API handler
|
||||||
|
verification_result = await app.LLM_API_HANDLER(
|
||||||
|
prompt=verification_prompt, step=step, screenshots=scraped_page_refreshed.screenshots
|
||||||
|
)
|
||||||
|
return CompleteVerifyResult.model_validate(verification_result)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def check_user_goal_complete(
|
async def check_user_goal_complete(
|
||||||
page: Page, scraped_page: ScrapedPage, task: Task, step: Step
|
page: Page, scraped_page: ScrapedPage, task: Task, step: Step
|
||||||
) -> CompleteAction | None:
|
) -> CompleteAction | None:
|
||||||
try:
|
try:
|
||||||
LOG.info(
|
verification_result = await app.agent.complete_verify(
|
||||||
"Checking if user goal is achieved after re-scraping the page without screenshots",
|
page=page,
|
||||||
task_id=task.task_id,
|
scraped_page=scraped_page,
|
||||||
step_id=step.step_id,
|
task=task,
|
||||||
workflow_run_id=task.workflow_run_id,
|
step=step,
|
||||||
)
|
|
||||||
scraped_page_refreshed = await scraped_page.refresh()
|
|
||||||
|
|
||||||
verification_prompt = prompt_engine.load_prompt(
|
|
||||||
"check-user-goal",
|
|
||||||
navigation_goal=task.navigation_goal,
|
|
||||||
navigation_payload=task.navigation_payload,
|
|
||||||
elements=scraped_page_refreshed.build_element_tree(ElementTreeFormat.HTML),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# this prompt is critical to our agent so let's use the primary LLM API handler
|
|
||||||
verification_response = await app.LLM_API_HANDLER(
|
|
||||||
prompt=verification_prompt, step=step, screenshots=scraped_page_refreshed.screenshots
|
|
||||||
)
|
|
||||||
if "user_goal_achieved" not in verification_response or "thoughts" not in verification_response:
|
|
||||||
LOG.error(
|
|
||||||
"Invalid LLM response for user goal success verification, skipping verification",
|
|
||||||
verification_response=verification_response,
|
|
||||||
task_id=task.task_id,
|
|
||||||
step_id=step.step_id,
|
|
||||||
workflow_run_id=task.workflow_run_id,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
user_goal_achieved: bool = verification_response["user_goal_achieved"]
|
|
||||||
# We don't want to return a complete action if the user goal is not achieved since we're checking at every step
|
# We don't want to return a complete action if the user goal is not achieved since we're checking at every step
|
||||||
if not user_goal_achieved:
|
if not verification_result.user_goal_achieved:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return CompleteAction(
|
return CompleteAction(
|
||||||
reasoning=verification_response["thoughts"],
|
reasoning=verification_result.thoughts,
|
||||||
data_extraction_goal=task.data_extraction_goal,
|
data_extraction_goal=task.data_extraction_goal,
|
||||||
|
verified=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.error(
|
LOG.exception(
|
||||||
"LLM verification failed for complete action, skipping LLM verification",
|
"Failed to check user goal complete, skipping",
|
||||||
task_id=task.task_id,
|
task_id=task.task_id,
|
||||||
step_id=step.step_id,
|
step_id=step.step_id,
|
||||||
workflow_run_id=task.workflow_run_id,
|
workflow_run_id=task.workflow_run_id,
|
||||||
exc_info=True,
|
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@@ -7,12 +7,19 @@ Make sure to ONLY return the JSON object in this format with no additional text
|
|||||||
"thoughts": str, // Think step by step. What information makes you believe whether user goal has completed or not. Use information you see on the site to explain.
|
"thoughts": str, // Think step by step. What information makes you believe whether user goal has completed or not. Use information you see on the site to explain.
|
||||||
"user_goal_achieved": bool // True if the user goal has been completed, false otherwise.
|
"user_goal_achieved": bool // True if the user goal has been completed, false otherwise.
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Elements on the page:
|
Elements on the page:
|
||||||
|
```
|
||||||
{{ elements }}
|
{{ elements }}
|
||||||
|
```
|
||||||
|
|
||||||
User Goal:
|
User Goal:
|
||||||
|
```
|
||||||
{{ navigation_goal }}
|
{{ navigation_goal }}
|
||||||
|
```
|
||||||
|
|
||||||
User Details:
|
User Details:
|
||||||
|
```
|
||||||
{{ navigation_payload }}
|
{{ navigation_payload }}
|
||||||
|
```
|
||||||
@@ -65,6 +65,15 @@ class SelectOption(BaseModel):
|
|||||||
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
|
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
|
||||||
|
|
||||||
|
|
||||||
|
class CompleteVerifyResult(BaseModel):
|
||||||
|
user_goal_achieved: bool
|
||||||
|
thoughts: str
|
||||||
|
page_info: str | None = None
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"CompleteVerifyResponse(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, page_info={self.page_info})"
|
||||||
|
|
||||||
|
|
||||||
class InputOrSelectContext(BaseModel):
|
class InputOrSelectContext(BaseModel):
|
||||||
field: str | None = None
|
field: str | None = None
|
||||||
is_required: bool | None = None
|
is_required: bool | None = None
|
||||||
@@ -226,6 +235,7 @@ class TerminateAction(DecisiveAction):
|
|||||||
|
|
||||||
class CompleteAction(DecisiveAction):
|
class CompleteAction(DecisiveAction):
|
||||||
action_type: ActionType = ActionType.COMPLETE
|
action_type: ActionType = ActionType.COMPLETE
|
||||||
|
verified: bool = False
|
||||||
data_extraction_goal: str | None = None
|
data_extraction_goal: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1032,9 +1032,14 @@ async def handle_complete_action(
|
|||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
# If this action has a source_action_id, then we need to make sure if the goal is actually completed.
|
# If this action has a source_action_id, then we need to make sure if the goal is actually completed.
|
||||||
if action.source_action_id:
|
if action.source_action_id:
|
||||||
LOG.info("CompleteAction has source_action_id, checking if goal is completed")
|
LOG.info(
|
||||||
complete_action = await app.agent.check_user_goal_complete(page, scraped_page, task, step)
|
"CompleteAction has source_action_id, checking if goal is completed",
|
||||||
if complete_action is None:
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
verified_complete_action = await app.agent.check_user_goal_complete(page, scraped_page, task, step)
|
||||||
|
if verified_complete_action is None:
|
||||||
return [
|
return [
|
||||||
ActionFailure(
|
ActionFailure(
|
||||||
exception=IllegitComplete(
|
exception=IllegitComplete(
|
||||||
@@ -1044,6 +1049,36 @@ async def handle_complete_action(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
action.verified = True
|
||||||
|
|
||||||
|
if not action.verified:
|
||||||
|
LOG.info(
|
||||||
|
"CompleteAction hasn't been verified, going to verify the user goal",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
verification_result = await app.agent.complete_verify(page, scraped_page, task, step)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception(
|
||||||
|
"Failed to verify the complete action",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
return [ActionFailure(exception=e)]
|
||||||
|
|
||||||
|
if not verification_result.user_goal_achieved:
|
||||||
|
return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
"CompleteAction has been verified successfully",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
workflow_run_id=task.workflow_run_id,
|
||||||
|
)
|
||||||
|
action.verified = True
|
||||||
|
|
||||||
extracted_data = None
|
extracted_data = None
|
||||||
if action.data_extraction_goal:
|
if action.data_extraction_goal:
|
||||||
|
|||||||
Reference in New Issue
Block a user