verification code V2 - support verification code of multiple separate single character input fields (#683)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
This commit is contained in:
Kerem Yilmaz
2024-08-08 02:17:15 +03:00
committed by GitHub
parent 78adb8b276
commit c872b1e4a8
5 changed files with 94 additions and 50 deletions

View File

@@ -11,7 +11,6 @@ PAGE_CONTENT_TIMEOUT = 300 # 5 mins
# reserved fields for navigation payload # reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"
VERIFICATION_CODE_PLACEHOLDER = "REAL_TOTP_CODE"
VERIFICATION_CODE_POLLING_TIMEOUT_MINS = 10 VERIFICATION_CODE_POLLING_TIMEOUT_MINS = 10

View File

@@ -11,12 +11,7 @@ from playwright._impl._errors import TargetClosedError
from playwright.async_api import Page from playwright.async_api import Page
from skyvern import analytics from skyvern import analytics
from skyvern.constants import ( from skyvern.constants import SCRAPE_TYPE_ORDER, SPECIAL_FIELD_VERIFICATION_CODE, ScrapeType
SCRAPE_TYPE_ORDER,
SPECIAL_FIELD_VERIFICATION_CODE,
VERIFICATION_CODE_PLACEHOLDER,
ScrapeType,
)
from skyvern.exceptions import ( from skyvern.exceptions import (
BrowserStateMissingPage, BrowserStateMissingPage,
EmptyScrapePage, EmptyScrapePage,
@@ -53,7 +48,7 @@ from skyvern.webeye.actions.actions import (
WebAction, WebAction,
parse_actions, parse_actions,
) )
from skyvern.webeye.actions.handler import ActionHandler from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.browser_factory import BrowserState
@@ -548,6 +543,13 @@ class ForgeAgent:
step=step, step=step,
screenshots=scraped_page.screenshots, screenshots=scraped_page.screenshots,
) )
json_response = await self.handle_potential_verification_code(
task,
step,
scraped_page,
browser_state,
json_response,
)
detailed_agent_step_output.llm_response = json_response detailed_agent_step_output.llm_response = json_response
actions = parse_actions(task, json_response["actions"]) actions = parse_actions(task, json_response["actions"])
@@ -951,16 +953,6 @@ class ForgeAgent:
num_elements=len(scraped_page.elements), num_elements=len(scraped_page.elements),
url=task.url, url=task.url,
) )
actions_and_results_str = await self._get_action_results(task)
# Generate the extract action prompt
navigation_goal = task.navigation_goal
starting_url = task.url
current_url = (
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
)
# TODO: we only use HTML element for now, introduce a way to switch in the future # TODO: we only use HTML element for now, introduce a way to switch in the future
element_tree_format = ElementTreeFormat.HTML element_tree_format = ElementTreeFormat.HTML
LOG.info( LOG.info(
@@ -971,18 +963,12 @@ class ForgeAgent:
) )
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format) element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
final_navigation_payload = self._build_navigation_payload(task) extract_action_prompt = await self._build_extract_action_prompt(
extract_action_prompt = prompt_engine.load_prompt( task,
"extract-action", browser_state,
navigation_goal=navigation_goal, element_tree_in_prompt,
navigation_payload_str=json.dumps(final_navigation_payload), verification_code_check=bool(task.totp_verification_url),
starting_url=starting_url, expire_verification_code=True,
current_url=current_url,
elements=element_tree_in_prompt,
data_extraction_goal=task.data_extraction_goal,
action_history=actions_and_results_str,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
) )
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
@@ -1013,26 +999,62 @@ class ForgeAgent:
return scraped_page, extract_action_prompt return scraped_page, extract_action_prompt
async def _build_extract_action_prompt(
self,
task: Task,
browser_state: BrowserState,
element_tree_in_prompt: str,
verification_code_check: bool = False,
expire_verification_code: bool = False,
) -> str:
actions_and_results_str = await self._get_action_results(task)
# Generate the extract action prompt
navigation_goal = task.navigation_goal
starting_url = task.url
current_url = (
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
)
final_navigation_payload = self._build_navigation_payload(
task, expire_verification_code=expire_verification_code
)
return prompt_engine.load_prompt(
"extract-action",
navigation_goal=navigation_goal,
navigation_payload_str=json.dumps(final_navigation_payload),
starting_url=starting_url,
current_url=current_url,
elements=element_tree_in_prompt,
data_extraction_goal=task.data_extraction_goal,
action_history=actions_and_results_str,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
verification_code_check=verification_code_check,
)
def _build_navigation_payload( def _build_navigation_payload(
self, self,
task: Task, task: Task,
expire_verification_code: bool = False,
) -> dict[str, Any] | list | str | None: ) -> dict[str, Any] | list | str | None:
final_navigation_payload = task.navigation_payload final_navigation_payload = task.navigation_payload
if task.totp_verification_url: current_context = skyvern_context.ensure_context()
verification_code = current_context.totp_codes.get(task.task_id)
if task.totp_verification_url and verification_code:
if ( if (
isinstance(final_navigation_payload, dict) isinstance(final_navigation_payload, dict)
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
): ):
final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = VERIFICATION_CODE_PLACEHOLDER final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code
elif ( elif (
isinstance(final_navigation_payload, str) isinstance(final_navigation_payload, str)
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
): ):
final_navigation_payload = ( final_navigation_payload = (
final_navigation_payload final_navigation_payload + "\n" + str({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
+ "\n"
+ str({SPECIAL_FIELD_VERIFICATION_CODE: VERIFICATION_CODE_PLACEHOLDER})
) )
if expire_verification_code:
current_context.totp_codes.pop(task.task_id)
return final_navigation_payload return final_navigation_payload
async def _get_action_results(self, task: Task) -> str: async def _get_action_results(self, task: Task) -> str:
@@ -1552,6 +1574,40 @@ class ForgeAgent:
) )
return None, None, next_step return None, None, next_step
async def handle_potential_verification_code(
self,
task: Task,
step: Step,
scraped_page: ScrapedPage,
browser_state: BrowserState,
json_response: dict[str, Any],
) -> dict[str, Any]:
# TODO: handle verifications and resend the request if needed
# parse the "need_verification_code" field from the response
need_verification_code = json_response.get("need_verification_code")
if need_verification_code and task.totp_verification_url and task.organization_id:
LOG.info("Need verification code", step_id=step.step_id)
verification_code = await poll_verification_code(
task.task_id, task.organization_id, url=task.totp_verification_url
)
current_context = skyvern_context.ensure_context()
current_context.totp_codes[task.task_id] = verification_code
element_tree_in_prompt: str = scraped_page.build_element_tree(ElementTreeFormat.HTML)
extract_action_prompt = await self._build_extract_action_prompt(
task,
browser_state,
element_tree_in_prompt,
verification_code_check=False,
expire_verification_code=False,
)
return await app.LLM_API_HANDLER(
prompt=extract_action_prompt,
step=step,
screenshots=scraped_page.screenshots,
)
return json_response
@staticmethod @staticmethod
async def get_task_errors(task: Task) -> list[UserDefinedError]: async def get_task_errors(task: Task) -> list[UserDefinedError]:
steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id) steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)

View File

@@ -9,6 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup.
Reply in JSON format with the following keys: Reply in JSON format with the following keys:
{ {
{% if verification_code_check %} "need_verification_code": bool, // Whether a verification code is needed to proceed.{% endif %}
"user_goal_achieved": str, // A string that describes if user goal has been completed with reasoning. "user_goal_achieved": str, // A string that describes if user goal has been completed with reasoning.
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user goal has been achieved. "action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user goal has been achieved.
"actions": array // An array of actions. Here's the format of each action: "actions": array // An array of actions. Here's the format of each action:
@@ -36,10 +37,8 @@ Reply in JSON format with the following keys:
}], }],
} }
{% if action_history %} {% if action_history %}
Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations. Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations.
{% endif %} {% endif %}
Clickable elements from `{{ current_url }}`: Clickable elements from `{{ current_url }}`:
``` ```
{{ elements }} {{ elements }}
@@ -52,12 +51,10 @@ User goal:
{{ navigation_goal }} {{ navigation_goal }}
``` ```
{% if error_code_mapping_str %} {% if error_code_mapping_str %}
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors: Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
{{ error_code_mapping_str }} {{ error_code_mapping_str }}
{% endif %} {% endif %}
{% if data_extraction_goal %} {% if data_extraction_goal %}
User Data Extraction Goal: User Data Extraction Goal:
``` ```
{{ data_extraction_goal }} {{ data_extraction_goal }}
@@ -69,11 +66,9 @@ User details:
{{ navigation_payload_str }} {{ navigation_payload_str }}
``` ```
{% if action_history %} {% if action_history %}
Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved) Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved)
{{ action_history }} {{ action_history }}
{% endif %} {% endif %}
Current datetime in UTC, YYYY-MM-DD HH:MM format: Current datetime in UTC, YYYY-MM-DD HH:MM format:
``` ```
{{ utc_datetime }} {{ utc_datetime }}

View File

@@ -1,5 +1,5 @@
from contextvars import ContextVar from contextvars import ContextVar
from dataclasses import dataclass from dataclasses import dataclass, field
@dataclass @dataclass
@@ -10,6 +10,7 @@ class SkyvernContext:
workflow_id: str | None = None workflow_id: str | None = None
workflow_run_id: str | None = None workflow_run_id: str | None = None
max_steps_override: int | None = None max_steps_override: int | None = None
totp_codes: dict[str, str | None] = field(default_factory=dict)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})" return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})"

View File

@@ -10,7 +10,7 @@ import structlog
from deprecation import deprecated from deprecation import deprecated
from playwright.async_api import FileChooser, Locator, Page, TimeoutError from playwright.async_api import FileChooser, Locator, Page, TimeoutError
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_PLACEHOLDER, VERIFICATION_CODE_POLLING_TIMEOUT_MINS from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
from skyvern.exceptions import ( from skyvern.exceptions import (
EmptySelect, EmptySelect,
ErrFoundSelectableElement, ErrFoundSelectableElement,
@@ -711,13 +711,6 @@ async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) ->
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url). This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
""" """
if task.totp_verification_url and task.organization_id and VERIFICATION_CODE_PLACEHOLDER == parameter:
# if parameter is the secret code in the navigation playload,
# fetch the real verification from totp_verification_url
# do polling every 10 seconds to fetch the verification code
verification_code = await poll_verification_code(task.task_id, task.organization_id, task.totp_verification_url)
return verification_code
if task.workflow_run_id is None: if task.workflow_run_id is None:
return parameter return parameter