Pedro/migrate context call to main prompt (#3400)

This commit is contained in:
Shuchang Zheng
2025-09-11 13:49:05 -07:00
committed by GitHub
parent 3d2b164ea8
commit b7515919d7
11 changed files with 104 additions and 9 deletions

View File

@@ -83,6 +83,7 @@ class Action(BaseModel):
download: bool | None = None
is_upload_file_tag: bool | None = None
text: str | None = None
input_or_select_context: InputOrSelectContext | None = None
option: SelectOption | None = None
is_checked: bool | None = None
verified: bool = False
@@ -165,7 +166,7 @@ class InputTextAction(WebAction):
text: str
def __repr__(self) -> str:
return f"InputTextAction(element_id={self.element_id}, text={self.text}, tool_call_id={self.tool_call_id})"
return f"InputTextAction(element_id={self.element_id}, text={self.text}, context={self.input_or_select_context}, tool_call_id={self.tool_call_id})"
class UploadFileAction(WebAction):
@@ -199,7 +200,7 @@ class SelectOptionAction(WebAction):
option: SelectOption
def __repr__(self) -> str:
return f"SelectOptionAction(element_id={self.element_id}, option={self.option})"
return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context})"
###

View File

@@ -784,9 +784,9 @@ async def handle_sequential_click_for_dropdown(
action=AbstractActionForContextParse(
reasoning=action.reasoning, intention=action.intention, element_id=action.element_id
),
step=step,
element_tree_builder=scraped_page,
skyvern_element=anchor_element,
element_tree_builder=scraped_page,
step=step,
)
if dropdown_select_context.is_date_related:
@@ -3425,9 +3425,20 @@ async def normal_select(
action_result: List[ActionResult] = []
is_success = False
locator = skyvern_element.get_locator()
input_or_select_context = await _get_input_or_select_context(
action=action, element_tree_builder=builder, step=step, skyvern_element=skyvern_element
action=action,
element_tree_builder=builder,
step=step,
skyvern_element=skyvern_element,
)
LOG.info(
"Parsed input/select context",
context=input_or_select_context,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.refresh_select_options()
options_html = skyvern_element.build_HTML()
field_information = (
@@ -3766,6 +3777,11 @@ async def _get_input_or_select_context(
step: Step,
ancestor_depth: int = 5,
) -> InputOrSelectContext:
# Early return optimization: if action already has input_or_select_context, use it
if not isinstance(action, AbstractActionForContextParse) and action.input_or_select_context is not None:
return action.input_or_select_context
# Ancestor depth optimization: use ancestor element for deep DOM structures
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
try:
depth = await skyvern_frame.get_element_dom_depth(await skyvern_element.get_element_handler())

View File

@@ -21,6 +21,7 @@ from skyvern.webeye.actions.actions import (
CompleteAction,
DownloadFileAction,
DragAction,
InputOrSelectContext,
InputTextAction,
KeypressAction,
LeftMouseAction,
@@ -68,6 +69,7 @@ def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extract
"intention": intention,
"response": response,
}
input_or_select_context: InputOrSelectContext | None = None
if "action_type" not in action or action["action_type"] is None:
return NullAction(**base_action_dict)
@@ -89,7 +91,11 @@ def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extract
return ClickAction(**base_action_dict, file_url=file_url, download=action.get("download", False))
if action_type == ActionType.INPUT_TEXT:
return InputTextAction(**base_action_dict, text=action["text"])
context_dict = action.get("context", {})
if context_dict and len(context_dict) > 0:
context_dict["intention"] = intention
input_or_select_context = InputOrSelectContext.model_validate(context_dict)
return InputTextAction(**base_action_dict, text=action["text"], input_or_select_context=input_or_select_context)
if action_type == ActionType.UPLOAD_FILE:
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
@@ -106,6 +112,12 @@ def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extract
option = action["option"]
if option is None:
raise ValueError("SelectOptionAction requires an 'option' field")
context_dict = action.get("context", {})
if context_dict and len(context_dict) > 0:
context_dict["intention"] = intention
input_or_select_context = InputOrSelectContext.model_validate(context_dict)
label = option.get("label")
value = option.get("value")
index = option.get("index")
@@ -118,6 +130,7 @@ def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extract
value=value,
index=index,
),
input_or_select_context=input_or_select_context,
)
if action_type == ActionType.CHECKBOX: