iframes support (#405)
Co-authored-by: Aleksei Zarubin <12220926+alexzarbn@users.noreply.github.com>
This commit is contained in:
@@ -818,6 +818,11 @@ class ForgeAgent:
|
|||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP,
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP,
|
||||||
data=json.dumps(scraped_page.id_to_xpath_dict, indent=2).encode(),
|
data=json.dumps(scraped_page.id_to_xpath_dict, indent=2).encode(),
|
||||||
)
|
)
|
||||||
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
|
step=step,
|
||||||
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP,
|
||||||
|
data=json.dumps(scraped_page.id_to_frame_dict, indent=2).encode(),
|
||||||
|
)
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
step=step,
|
step=step,
|
||||||
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE,
|
artifact_type=ArtifactType.VISIBLE_ELEMENTS_TREE,
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ class ArtifactType(StrEnum):
|
|||||||
LLM_RESPONSE = "llm_response"
|
LLM_RESPONSE = "llm_response"
|
||||||
LLM_RESPONSE_PARSED = "llm_response_parsed"
|
LLM_RESPONSE_PARSED = "llm_response_parsed"
|
||||||
VISIBLE_ELEMENTS_ID_XPATH_MAP = "visible_elements_id_xpath_map"
|
VISIBLE_ELEMENTS_ID_XPATH_MAP = "visible_elements_id_xpath_map"
|
||||||
|
VISIBLE_ELEMENTS_ID_FRAME_MAP = "visible_elements_id_frame_map"
|
||||||
VISIBLE_ELEMENTS_TREE = "visible_elements_tree"
|
VISIBLE_ELEMENTS_TREE = "visible_elements_tree"
|
||||||
VISIBLE_ELEMENTS_TREE_TRIMMED = "visible_elements_tree_trimmed"
|
VISIBLE_ELEMENTS_TREE_TRIMMED = "visible_elements_tree_trimmed"
|
||||||
VISIBLE_ELEMENTS_TREE_IN_PROMPT = "visible_elements_tree_in_prompt"
|
VISIBLE_ELEMENTS_TREE_IN_PROMPT = "visible_elements_tree_in_prompt"
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
|
|||||||
ArtifactType.LLM_RESPONSE: "json",
|
ArtifactType.LLM_RESPONSE: "json",
|
||||||
ArtifactType.LLM_RESPONSE_PARSED: "json",
|
ArtifactType.LLM_RESPONSE_PARSED: "json",
|
||||||
ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP: "json",
|
ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP: "json",
|
||||||
|
ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP: "json",
|
||||||
ArtifactType.VISIBLE_ELEMENTS_TREE: "json",
|
ArtifactType.VISIBLE_ELEMENTS_TREE: "json",
|
||||||
ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED: "json",
|
ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED: "json",
|
||||||
ArtifactType.VISIBLE_ELEMENTS_TREE_IN_PROMPT: "txt",
|
ArtifactType.VISIBLE_ELEMENTS_TREE_IN_PROMPT: "txt",
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ from typing import Any, Awaitable, Callable, List
|
|||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
from deprecation import deprecated
|
from deprecation import deprecated
|
||||||
from playwright.async_api import Locator, Page
|
from playwright.async_api import FrameLocator, Locator, Page
|
||||||
|
|
||||||
from skyvern.constants import REPO_ROOT_DIR
|
from skyvern.constants import REPO_ROOT_DIR, SKYVERN_ID_ATTR
|
||||||
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
|
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound, SkyvernException
|
||||||
from skyvern.forge import app
|
from skyvern.forge import app
|
||||||
from skyvern.forge.prompts import prompt_engine
|
from skyvern.forge.prompts import prompt_engine
|
||||||
from skyvern.forge.sdk.api.files import (
|
from skyvern.forge.sdk.api.files import (
|
||||||
@@ -175,16 +175,18 @@ async def handle_click_action(
|
|||||||
num_downloaded_files_before=num_downloaded_files_before,
|
num_downloaded_files_before=num_downloaded_files_before,
|
||||||
download_dir=download_dir,
|
download_dir=download_dir,
|
||||||
)
|
)
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
if action.download:
|
if action.download:
|
||||||
results = await handle_click_to_download_file_action(action, page, scraped_page)
|
results = await handle_click_to_download_file_action(action, page, scraped_page)
|
||||||
else:
|
else:
|
||||||
results = await chain_click(
|
results = await chain_click(
|
||||||
task,
|
task,
|
||||||
|
scraped_page,
|
||||||
page,
|
page,
|
||||||
action,
|
action,
|
||||||
xpath,
|
xpath,
|
||||||
|
frame,
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -208,10 +210,12 @@ async def handle_click_to_download_file_action(
|
|||||||
page: Page,
|
page: Page,
|
||||||
scraped_page: ScrapedPage,
|
scraped_page: ScrapedPage,
|
||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
|
|
||||||
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await page.click(
|
await locator.click(
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
modifiers=["Alt"],
|
modifiers=["Alt"],
|
||||||
)
|
)
|
||||||
@@ -229,8 +233,9 @@ async def handle_input_text_action(
|
|||||||
task: Task,
|
task: Task,
|
||||||
step: Step,
|
step: Step,
|
||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
locator = page.locator(f"xpath={xpath}")
|
|
||||||
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
current_text = await locator.input_value()
|
current_text = await locator.input_value()
|
||||||
if current_text == action.text:
|
if current_text == action.text:
|
||||||
@@ -269,20 +274,28 @@ async def handle_upload_file_action(
|
|||||||
file_url=action.file_url,
|
file_url=action.file_url,
|
||||||
)
|
)
|
||||||
return [ActionFailure(ImaginaryFileUrl(action.file_url))]
|
return [ActionFailure(ImaginaryFileUrl(action.file_url))]
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
|
||||||
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
|
|
||||||
file_path = await download_file(file_url)
|
file_path = await download_file(file_url)
|
||||||
locator = page.locator(f"xpath={xpath}")
|
|
||||||
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
is_file_input = await is_file_input_element(locator)
|
is_file_input = await is_file_input_element(locator)
|
||||||
|
|
||||||
if is_file_input:
|
if is_file_input:
|
||||||
LOG.info("Taking UploadFileAction. Found file input tag", action=action)
|
LOG.info("Taking UploadFileAction. Found file input tag", action=action)
|
||||||
if file_path:
|
if file_path:
|
||||||
await page.locator(f"xpath={xpath}").set_input_files(
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
|
await locator.set_input_files(
|
||||||
file_path,
|
file_path,
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Sleep for 10 seconds after uploading a file to let the page process it
|
# Sleep for 10 seconds after uploading a file to let the page process it
|
||||||
await asyncio.sleep(10)
|
await asyncio.sleep(10)
|
||||||
|
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
else:
|
else:
|
||||||
return [ActionFailure(Exception(f"Failed to download file from {action.file_url}"))]
|
return [ActionFailure(Exception(f"Failed to download file from {action.file_url}"))]
|
||||||
@@ -292,9 +305,11 @@ async def handle_upload_file_action(
|
|||||||
action.is_upload_file_tag = False
|
action.is_upload_file_tag = False
|
||||||
return await chain_click(
|
return await chain_click(
|
||||||
task,
|
task,
|
||||||
|
scraped_page,
|
||||||
page,
|
page,
|
||||||
action,
|
action,
|
||||||
xpath,
|
xpath,
|
||||||
|
frame,
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -307,15 +322,17 @@ async def handle_download_file_action(
|
|||||||
task: Task,
|
task: Task,
|
||||||
step: Step,
|
step: Step,
|
||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
file_name = f"{action.file_name or uuid.uuid4()}"
|
file_name = f"{action.file_name or uuid.uuid4()}"
|
||||||
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
||||||
try:
|
try:
|
||||||
# Start waiting for the download
|
# Start waiting for the download
|
||||||
async with page.expect_download() as download_info:
|
async with page.expect_download() as download_info:
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.3)
|
||||||
await page.click(
|
|
||||||
f"xpath={xpath}",
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
|
await locator.click(
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
modifiers=["Alt"],
|
modifiers=["Alt"],
|
||||||
)
|
)
|
||||||
@@ -355,9 +372,10 @@ async def handle_select_option_action(
|
|||||||
task: Task,
|
task: Task,
|
||||||
step: Step,
|
step: Step,
|
||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
|
|
||||||
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
locator = page.locator(f"xpath={xpath}")
|
|
||||||
tag_name = await get_tag_name_lowercase(locator)
|
tag_name = await get_tag_name_lowercase(locator)
|
||||||
element_dict = scraped_page.id_to_element_dict[action.element_id]
|
element_dict = scraped_page.id_to_element_dict[action.element_id]
|
||||||
LOG.info(
|
LOG.info(
|
||||||
@@ -400,7 +418,7 @@ async def handle_select_option_action(
|
|||||||
child_anchor_xpath=child_anchor_xpath,
|
child_anchor_xpath=child_anchor_xpath,
|
||||||
)
|
)
|
||||||
click_action = ClickAction(element_id=action.element_id)
|
click_action = ClickAction(element_id=action.element_id)
|
||||||
return await chain_click(task, page, click_action, child_anchor_xpath)
|
return await chain_click(task, scraped_page, page, click_action, child_anchor_xpath, frame)
|
||||||
|
|
||||||
# handler the select action on <label>
|
# handler the select action on <label>
|
||||||
select_element_id = get_select_id_in_label_children(scraped_page, action.element_id)
|
select_element_id = get_select_id_in_label_children(scraped_page, action.element_id)
|
||||||
@@ -432,7 +450,7 @@ async def handle_select_option_action(
|
|||||||
action=action,
|
action=action,
|
||||||
)
|
)
|
||||||
click_action = ClickAction(element_id=action.element_id)
|
click_action = ClickAction(element_id=action.element_id)
|
||||||
action_result = await chain_click(task, page, click_action, xpath)
|
action_result = await chain_click(task, scraped_page, page, click_action, xpath, frame)
|
||||||
return action_result
|
return action_result
|
||||||
elif tag_name == "ul" or tag_name == "div" or tag_name == "li":
|
elif tag_name == "ul" or tag_name == "div" or tag_name == "li":
|
||||||
# if the role is listbox, find the option with the "label" or "value" and click that option element
|
# if the role is listbox, find the option with the "label" or "value" and click that option element
|
||||||
@@ -464,7 +482,7 @@ async def handle_select_option_action(
|
|||||||
)
|
)
|
||||||
# click the option element
|
# click the option element
|
||||||
click_action = ClickAction(element_id=action.element_id)
|
click_action = ClickAction(element_id=action.element_id)
|
||||||
return await chain_click(task, page, click_action, xpath)
|
return await chain_click(task, scraped_page, page, click_action, xpath, frame)
|
||||||
else:
|
else:
|
||||||
LOG.error(
|
LOG.error(
|
||||||
"SelectOptionAction on a non-listbox element. Cannot handle this action",
|
"SelectOptionAction on a non-listbox element. Cannot handle this action",
|
||||||
@@ -481,19 +499,17 @@ async def handle_select_option_action(
|
|||||||
current_text = await locator.input_value()
|
current_text = await locator.input_value()
|
||||||
if current_text == action.option.label:
|
if current_text == action.option.label:
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# First click by label (if it matches)
|
# First click by label (if it matches)
|
||||||
await page.click(
|
await locator.click(
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
await page.select_option(
|
await locator.select_option(
|
||||||
xpath,
|
|
||||||
label=action.option.label,
|
label=action.option.label,
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
await page.click(
|
await locator.click(
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
@@ -536,7 +552,7 @@ async def handle_select_option_action(
|
|||||||
|
|
||||||
|
|
||||||
async def handle_checkbox_action(
|
async def handle_checkbox_action(
|
||||||
self: actions.CheckboxAction,
|
action: actions.CheckboxAction,
|
||||||
page: Page,
|
page: Page,
|
||||||
scraped_page: ScrapedPage,
|
scraped_page: ScrapedPage,
|
||||||
task: Task,
|
task: Task,
|
||||||
@@ -549,11 +565,14 @@ async def handle_checkbox_action(
|
|||||||
Treating checkbox actions as click actions seem to perform way more reliably
|
Treating checkbox actions as click actions seem to perform way more reliably
|
||||||
Developers who tried this and failed: 2 (Suchintan and Shu 😂)
|
Developers who tried this and failed: 2 (Suchintan and Shu 😂)
|
||||||
"""
|
"""
|
||||||
xpath = await validate_actions_in_dom(self, page, scraped_page)
|
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
if self.is_checked:
|
|
||||||
await page.check(xpath, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
|
if action.is_checked:
|
||||||
|
await locator.check(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
|
||||||
else:
|
else:
|
||||||
await page.uncheck(xpath, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
|
await locator.uncheck(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
|
||||||
|
|
||||||
# TODO (suchintan): Why does checking the label work, but not the actual input element?
|
# TODO (suchintan): Why does checking the label work, but not the actual input element?
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
@@ -630,9 +649,11 @@ def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any:
|
|||||||
return secret_value if secret_value is not None else parameter
|
return secret_value if secret_value is not None else parameter
|
||||||
|
|
||||||
|
|
||||||
async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: ScrapedPage) -> str:
|
async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: ScrapedPage) -> tuple[str, str]:
|
||||||
xpath = scraped_page.id_to_xpath_dict[action.element_id]
|
xpath = scraped_page.id_to_xpath_dict[action.element_id]
|
||||||
locator = page.locator(xpath)
|
frame = scraped_page.id_to_frame_dict[action.element_id]
|
||||||
|
|
||||||
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
|
||||||
num_elements = await locator.count()
|
num_elements = await locator.count()
|
||||||
if num_elements < 1:
|
if num_elements < 1:
|
||||||
@@ -652,14 +673,16 @@ async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: S
|
|||||||
else:
|
else:
|
||||||
LOG.info("Validated action xpath in DOM", action=action)
|
LOG.info("Validated action xpath in DOM", action=action)
|
||||||
|
|
||||||
return xpath
|
return xpath, frame
|
||||||
|
|
||||||
|
|
||||||
async def chain_click(
|
async def chain_click(
|
||||||
task: Task,
|
task: Task,
|
||||||
|
scraped_page: ScrapedPage,
|
||||||
page: Page,
|
page: Page,
|
||||||
action: ClickAction | UploadFileAction,
|
action: ClickAction | UploadFileAction,
|
||||||
xpath: str,
|
xpath: str,
|
||||||
|
frame: str,
|
||||||
timeout: int = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout: int = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
) -> List[ActionResult]:
|
) -> List[ActionResult]:
|
||||||
# Add a defensive page handler here in case a click action opens a file chooser.
|
# Add a defensive page handler here in case a click action opens a file chooser.
|
||||||
@@ -689,9 +712,11 @@ async def chain_click(
|
|||||||
Clicks on an element identified by the xpath and its parent if failed.
|
Clicks on an element identified by the xpath and its parent if failed.
|
||||||
:param xpath: xpath of the element to click
|
:param xpath: xpath of the element to click
|
||||||
"""
|
"""
|
||||||
javascript_triggered = await is_javascript_triggered(page, xpath)
|
javascript_triggered = await is_javascript_triggered(scraped_page, page, frame, xpath)
|
||||||
try:
|
try:
|
||||||
await page.click(f"xpath={xpath}", timeout=timeout)
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
|
await locator.click(timeout=timeout)
|
||||||
|
|
||||||
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
|
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
|
||||||
return [
|
return [
|
||||||
ActionSuccess(
|
ActionSuccess(
|
||||||
@@ -718,10 +743,12 @@ async def chain_click(
|
|||||||
|
|
||||||
parent_xpath = f"{xpath}/.."
|
parent_xpath = f"{xpath}/.."
|
||||||
try:
|
try:
|
||||||
parent_javascript_triggered = await is_javascript_triggered(page, parent_xpath)
|
parent_javascript_triggered = await is_javascript_triggered(scraped_page, page, frame, parent_xpath)
|
||||||
javascript_triggered = javascript_triggered or parent_javascript_triggered
|
javascript_triggered = javascript_triggered or parent_javascript_triggered
|
||||||
parent_locator = page.locator(xpath).locator("..")
|
|
||||||
|
parent_locator = resolve_locator(scraped_page, page, frame, xpath).locator("..")
|
||||||
await parent_locator.click(timeout=timeout)
|
await parent_locator.click(timeout=timeout)
|
||||||
|
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Chain click: successfully clicked parent element",
|
"Chain click: successfully clicked parent element",
|
||||||
action=action,
|
action=action,
|
||||||
@@ -806,9 +833,10 @@ def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def is_javascript_triggered(page: Page, xpath: str) -> bool:
|
async def is_javascript_triggered(scraped_page: ScrapedPage, page: Page, frame: str, xpath: str) -> bool:
|
||||||
locator = page.locator(f"xpath={xpath}")
|
locator = resolve_locator(scraped_page, page, frame, xpath)
|
||||||
element = locator.first
|
element = locator.first
|
||||||
|
|
||||||
tag_name = await element.evaluate("e => e.tagName")
|
tag_name = await element.evaluate("e => e.tagName")
|
||||||
if tag_name.lower() == "a":
|
if tag_name.lower() == "a":
|
||||||
href = await element.evaluate("e => e.href")
|
href = await element.evaluate("e => e.href")
|
||||||
@@ -928,8 +956,13 @@ async def click_listbox_option(
|
|||||||
text = child["text"] if "text" in child else ""
|
text = child["text"] if "text" in child else ""
|
||||||
if text and (text == action.option.label or text == action.option.value):
|
if text and (text == action.option.label or text == action.option.value):
|
||||||
option_xpath = scraped_page.id_to_xpath_dict[child["id"]]
|
option_xpath = scraped_page.id_to_xpath_dict[child["id"]]
|
||||||
|
option_frame = scraped_page.id_to_frame_dict[child["id"]]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await page.click(f"xpath={option_xpath}", timeout=1000)
|
locator = resolve_locator(scraped_page, page, option_frame, option_xpath)
|
||||||
|
|
||||||
|
await locator.click(timeout=1000)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.error(
|
LOG.error(
|
||||||
@@ -941,3 +974,28 @@ async def click_listbox_option(
|
|||||||
if "children" in child:
|
if "children" in child:
|
||||||
bfs_queue.extend(child["children"])
|
bfs_queue.extend(child["children"])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_locator(scrape_page: ScrapedPage, page: Page, frame: str, xpath: str) -> Locator:
|
||||||
|
iframe_path: list[str] = []
|
||||||
|
|
||||||
|
while frame != "main.frame":
|
||||||
|
iframe_path.append(frame)
|
||||||
|
|
||||||
|
frame_element = scrape_page.id_to_element_dict.get(frame)
|
||||||
|
if frame_element is None:
|
||||||
|
raise MissingElement(element_id=frame)
|
||||||
|
|
||||||
|
parent_frame = frame_element.get("frame")
|
||||||
|
if not parent_frame:
|
||||||
|
raise SkyvernException(f"element without frame: {frame_element}")
|
||||||
|
|
||||||
|
LOG.info(f"{frame} is a child frame of {parent_frame}")
|
||||||
|
frame = parent_frame
|
||||||
|
|
||||||
|
current_page: Page | FrameLocator = page
|
||||||
|
while len(iframe_path) > 0:
|
||||||
|
child_frame = iframe_path.pop()
|
||||||
|
current_page = current_page.frame_locator(f"[{SKYVERN_ID_ATTR}='{child_frame}']")
|
||||||
|
|
||||||
|
return current_page.locator(f"xpath={xpath}")
|
||||||
|
|||||||
@@ -342,6 +342,10 @@ function isInteractable(element) {
|
|||||||
|
|
||||||
const tagName = element.tagName.toLowerCase();
|
const tagName = element.tagName.toLowerCase();
|
||||||
|
|
||||||
|
if (tagName === "iframe") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (tagName === "a" && element.href) {
|
if (tagName === "a" && element.href) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -576,7 +580,7 @@ function uniqueId() {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildTreeFromBody() {
|
function buildTreeFromBody(frame = "main.frame") {
|
||||||
var elements = [];
|
var elements = [];
|
||||||
var resultArray = [];
|
var resultArray = [];
|
||||||
|
|
||||||
@@ -679,6 +683,7 @@ function buildTreeFromBody() {
|
|||||||
|
|
||||||
let elementObj = {
|
let elementObj = {
|
||||||
id: element_id,
|
id: element_id,
|
||||||
|
frame: frame,
|
||||||
interactable: interactable,
|
interactable: interactable,
|
||||||
tagName: elementTagNameLower,
|
tagName: elementTagNameLower,
|
||||||
attributes: attrs,
|
attributes: attrs,
|
||||||
@@ -760,6 +765,11 @@ function buildTreeFromBody() {
|
|||||||
processElement(child, elementObj.id);
|
processElement(child, elementObj.id);
|
||||||
});
|
});
|
||||||
return elementObj;
|
return elementObj;
|
||||||
|
} else if (element.tagName.toLowerCase() === "iframe") {
|
||||||
|
let iframeElementObject = buildElementObject(element, false);
|
||||||
|
|
||||||
|
elements.push(iframeElementObject);
|
||||||
|
resultArray.push(iframeElementObject);
|
||||||
} else {
|
} else {
|
||||||
// For a non-interactable element, if it has direct text, we also tagged
|
// For a non-interactable element, if it has direct text, we also tagged
|
||||||
// it with unique_id, but with interatable=false in the element.
|
// it with unique_id, but with interatable=false in the element.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from enum import StrEnum
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
from playwright.async_api import Page
|
from playwright.async_api import Frame, Page
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
|
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||||
@@ -122,6 +122,7 @@ class ScrapedPage(BaseModel):
|
|||||||
|
|
||||||
elements: list[dict]
|
elements: list[dict]
|
||||||
id_to_element_dict: dict[str, dict] = {}
|
id_to_element_dict: dict[str, dict] = {}
|
||||||
|
id_to_frame_dict: dict[str, str] = {}
|
||||||
id_to_xpath_dict: dict[str, str]
|
id_to_xpath_dict: dict[str, str]
|
||||||
element_tree: list[dict]
|
element_tree: list[dict]
|
||||||
element_tree_trimmed: list[dict]
|
element_tree_trimmed: list[dict]
|
||||||
@@ -187,14 +188,30 @@ async def scrape_website(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def get_all_visible_text(page: Page) -> str:
|
async def get_frame_text(iframe: Frame) -> str:
|
||||||
"""
|
"""
|
||||||
Get all the visible text on the page.
|
Get all the visible text in the iframe.
|
||||||
:param page: Page instance to get the text from.
|
:param iframe: Frame instance to get the text from.
|
||||||
:return: All the visible text on the page.
|
:return: All the visible text from the iframe.
|
||||||
"""
|
"""
|
||||||
js_script = "() => document.body.innerText"
|
js_script = "() => document.body.innerText"
|
||||||
return await page.evaluate(js_script)
|
|
||||||
|
try:
|
||||||
|
text = await iframe.evaluate(js_script)
|
||||||
|
except Exception:
|
||||||
|
LOG.warning(
|
||||||
|
"failed to get text from iframe",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
for child_frame in iframe.child_frames:
|
||||||
|
if child_frame.is_detached():
|
||||||
|
continue
|
||||||
|
|
||||||
|
text += await get_frame_text(child_frame)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
async def scrape_web_unsafe(
|
async def scrape_web_unsafe(
|
||||||
@@ -256,17 +273,22 @@ async def scrape_web_unsafe(
|
|||||||
|
|
||||||
id_to_xpath_dict = {}
|
id_to_xpath_dict = {}
|
||||||
id_to_element_dict = {}
|
id_to_element_dict = {}
|
||||||
|
id_to_frame_dict = {}
|
||||||
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
element_id = element["id"]
|
element_id = element["id"]
|
||||||
# get_interactable_element_tree marks each interactable element with a unique_id attribute
|
# get_interactable_element_tree marks each interactable element with a unique_id attribute
|
||||||
id_to_xpath_dict[element_id] = f"//*[@{SKYVERN_ID_ATTR}='{element_id}']"
|
id_to_xpath_dict[element_id] = f"//*[@{SKYVERN_ID_ATTR}='{element_id}']"
|
||||||
id_to_element_dict[element_id] = element
|
id_to_element_dict[element_id] = element
|
||||||
|
id_to_frame_dict[element_id] = element["frame"]
|
||||||
|
|
||||||
|
text_content = await get_frame_text(page.main_frame)
|
||||||
|
|
||||||
text_content = await get_all_visible_text(page)
|
|
||||||
return ScrapedPage(
|
return ScrapedPage(
|
||||||
elements=elements,
|
elements=elements,
|
||||||
id_to_xpath_dict=id_to_xpath_dict,
|
id_to_xpath_dict=id_to_xpath_dict,
|
||||||
id_to_element_dict=id_to_element_dict,
|
id_to_element_dict=id_to_element_dict,
|
||||||
|
id_to_frame_dict=id_to_frame_dict,
|
||||||
element_tree=element_tree,
|
element_tree=element_tree,
|
||||||
element_tree_trimmed=trim_element_tree(copy.deepcopy(element_tree)),
|
element_tree_trimmed=trim_element_tree(copy.deepcopy(element_tree)),
|
||||||
screenshots=screenshots,
|
screenshots=screenshots,
|
||||||
@@ -276,6 +298,47 @@ async def scrape_web_unsafe(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_interactable_element_tree_in_frame(
|
||||||
|
frames: list[Frame], elements: list[dict], element_tree: list[dict]
|
||||||
|
) -> tuple[list[dict], list[dict]]:
|
||||||
|
for frame in frames:
|
||||||
|
if frame.is_detached():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
frame_element = await frame.frame_element()
|
||||||
|
except Exception:
|
||||||
|
LOG.warning(
|
||||||
|
"Unable to get frame_element",
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
unique_id = await frame_element.get_attribute("unique_id")
|
||||||
|
|
||||||
|
frame_js_script = f"() => buildTreeFromBody('{unique_id}')"
|
||||||
|
|
||||||
|
await frame.evaluate(JS_FUNCTION_DEFS)
|
||||||
|
frame_elements, frame_element_tree = await frame.evaluate(frame_js_script)
|
||||||
|
|
||||||
|
if len(frame.child_frames) > 0:
|
||||||
|
frame_elements, frame_element_tree = await get_interactable_element_tree_in_frame(
|
||||||
|
frame.child_frames, frame_elements, frame_element_tree
|
||||||
|
)
|
||||||
|
|
||||||
|
for element in elements:
|
||||||
|
if element["id"] == unique_id:
|
||||||
|
element["children"] = frame_elements
|
||||||
|
|
||||||
|
for element_tree_item in element_tree:
|
||||||
|
if element_tree_item["id"] == unique_id:
|
||||||
|
element_tree_item["children"] = frame_element_tree
|
||||||
|
|
||||||
|
elements = elements + frame_elements
|
||||||
|
|
||||||
|
return elements, element_tree
|
||||||
|
|
||||||
|
|
||||||
async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[dict]]:
|
async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[dict]]:
|
||||||
"""
|
"""
|
||||||
Get the element tree of the page, including all the elements that are interactable.
|
Get the element tree of the page, including all the elements that are interactable.
|
||||||
@@ -283,8 +346,14 @@ async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[di
|
|||||||
:return: Tuple containing the element tree and a map of element IDs to elements.
|
:return: Tuple containing the element tree and a map of element IDs to elements.
|
||||||
"""
|
"""
|
||||||
await page.evaluate(JS_FUNCTION_DEFS)
|
await page.evaluate(JS_FUNCTION_DEFS)
|
||||||
js_script = "() => buildTreeFromBody()"
|
main_frame_js_script = "() => buildTreeFromBody('main.frame')"
|
||||||
elements, element_tree = await page.evaluate(js_script)
|
elements, element_tree = await page.evaluate(main_frame_js_script)
|
||||||
|
|
||||||
|
if len(page.main_frame.child_frames) > 0:
|
||||||
|
elements, element_tree = await get_interactable_element_tree_in_frame(
|
||||||
|
page.main_frame.child_frames, elements, element_tree
|
||||||
|
)
|
||||||
|
|
||||||
return elements, element_tree
|
return elements, element_tree
|
||||||
|
|
||||||
|
|
||||||
@@ -352,6 +421,9 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
|
|||||||
queue.append(element)
|
queue.append(element)
|
||||||
while queue:
|
while queue:
|
||||||
queue_ele = queue.pop(0)
|
queue_ele = queue.pop(0)
|
||||||
|
if "frame" in queue_ele:
|
||||||
|
del queue_ele["frame"]
|
||||||
|
|
||||||
if "attributes" in queue_ele:
|
if "attributes" in queue_ele:
|
||||||
tag_name = queue_ele["tagName"] if "tagName" in queue_ele else ""
|
tag_name = queue_ele["tagName"] if "tagName" in queue_ele else ""
|
||||||
new_attributes = _trimmed_attributes(tag_name, queue_ele["attributes"])
|
new_attributes = _trimmed_attributes(tag_name, queue_ele["attributes"])
|
||||||
|
|||||||
@@ -308,6 +308,7 @@ with visualizer_tab:
|
|||||||
tab_screenshot,
|
tab_screenshot,
|
||||||
tab_post_action_screenshot,
|
tab_post_action_screenshot,
|
||||||
tab_id_to_xpath,
|
tab_id_to_xpath,
|
||||||
|
tab_id_to_frame,
|
||||||
tab_element_tree,
|
tab_element_tree,
|
||||||
tab_element_tree_trimmed,
|
tab_element_tree_trimmed,
|
||||||
tab_llm_prompt,
|
tab_llm_prompt,
|
||||||
@@ -323,6 +324,7 @@ with visualizer_tab:
|
|||||||
":rainbow[Screenshot]",
|
":rainbow[Screenshot]",
|
||||||
":rainbow[Action Screenshots]",
|
":rainbow[Action Screenshots]",
|
||||||
":red[ID -> XPath]",
|
":red[ID -> XPath]",
|
||||||
|
":red[ID -> Frame]",
|
||||||
":orange[Element Tree]",
|
":orange[Element Tree]",
|
||||||
":blue[Element Tree (Trimmed)]",
|
":blue[Element Tree (Trimmed)]",
|
||||||
":yellow[LLM Prompt]",
|
":yellow[LLM Prompt]",
|
||||||
@@ -422,6 +424,13 @@ with visualizer_tab:
|
|||||||
read_artifact_safe(uri),
|
read_artifact_safe(uri),
|
||||||
"No ID -> XPath map available.",
|
"No ID -> XPath map available.",
|
||||||
)
|
)
|
||||||
|
elif file_name.endswith("id_frame_map.json"):
|
||||||
|
streamlit_content_safe(
|
||||||
|
tab_id_to_frame,
|
||||||
|
tab_id_to_frame.json,
|
||||||
|
read_artifact_safe(uri),
|
||||||
|
"No ID -> Frame map available.",
|
||||||
|
)
|
||||||
elif file_name.endswith("tree.json"):
|
elif file_name.endswith("tree.json"):
|
||||||
streamlit_content_safe(
|
streamlit_content_safe(
|
||||||
tab_element_tree,
|
tab_element_tree,
|
||||||
|
|||||||
Reference in New Issue
Block a user