make browser download timeout configurable for blocks and tasks (#3619)
This commit is contained in:
@@ -0,0 +1,31 @@
|
|||||||
|
"""add download_timeout to tasks table
|
||||||
|
|
||||||
|
Revision ID: 1ab477ef80e4
|
||||||
|
Revises: cce87185dbb5
|
||||||
|
Create Date: 2025-10-06 15:04:12.103789+00:00
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "1ab477ef80e4"
|
||||||
|
down_revision: Union[str, None] = "cce87185dbb5"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.add_column("tasks", sa.Column("download_timeout", sa.Numeric(), nullable=True))
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column("tasks", "download_timeout")
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -18,6 +18,7 @@ from playwright.async_api import Page
|
|||||||
from skyvern import analytics
|
from skyvern import analytics
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.constants import (
|
from skyvern.constants import (
|
||||||
|
BROWSER_DOWNLOAD_TIMEOUT,
|
||||||
BROWSER_DOWNLOADING_SUFFIX,
|
BROWSER_DOWNLOADING_SUFFIX,
|
||||||
DEFAULT_MAX_SCREENSHOT_SCROLLS,
|
DEFAULT_MAX_SCREENSHOT_SCROLLS,
|
||||||
GET_DOWNLOADED_FILES_TIMEOUT,
|
GET_DOWNLOADED_FILES_TIMEOUT,
|
||||||
@@ -199,6 +200,7 @@ class ForgeAgent:
|
|||||||
extra_http_headers=workflow_run.extra_http_headers,
|
extra_http_headers=workflow_run.extra_http_headers,
|
||||||
browser_address=workflow_run.browser_address,
|
browser_address=workflow_run.browser_address,
|
||||||
browser_session_id=workflow_run.browser_session_id,
|
browser_session_id=workflow_run.browser_session_id,
|
||||||
|
download_timeout=task_block.download_timeout,
|
||||||
)
|
)
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Created a new task for workflow run",
|
"Created a new task for workflow run",
|
||||||
@@ -217,6 +219,7 @@ class ForgeAgent:
|
|||||||
organization_id=task.organization_id,
|
organization_id=task.organization_id,
|
||||||
status=TaskStatus.running,
|
status=TaskStatus.running,
|
||||||
)
|
)
|
||||||
|
|
||||||
step = await app.DATABASE.create_step(
|
step = await app.DATABASE.create_step(
|
||||||
task.task_id,
|
task.task_id,
|
||||||
order=0,
|
order=0,
|
||||||
@@ -500,7 +503,10 @@ class ForgeAgent:
|
|||||||
step_id=step.step_id,
|
step_id=step.step_id,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await wait_for_download_finished(downloading_files=downloading_files)
|
await wait_for_download_finished(
|
||||||
|
downloading_files=downloading_files,
|
||||||
|
timeout=task_block.download_timeout or BROWSER_DOWNLOAD_TIMEOUT,
|
||||||
|
)
|
||||||
except DownloadFileMaxWaitingTime as e:
|
except DownloadFileMaxWaitingTime as e:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"There're several long-time downloading files, these files might be broken",
|
"There're several long-time downloading files, these files might be broken",
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ class AgentDB:
|
|||||||
extra_http_headers: dict[str, str] | None = None,
|
extra_http_headers: dict[str, str] | None = None,
|
||||||
browser_session_id: str | None = None,
|
browser_session_id: str | None = None,
|
||||||
browser_address: str | None = None,
|
browser_address: str | None = None,
|
||||||
|
download_timeout: float | None = None,
|
||||||
) -> Task:
|
) -> Task:
|
||||||
try:
|
try:
|
||||||
async with self.Session() as session:
|
async with self.Session() as session:
|
||||||
@@ -203,6 +204,7 @@ class AgentDB:
|
|||||||
extra_http_headers=extra_http_headers,
|
extra_http_headers=extra_http_headers,
|
||||||
browser_session_id=browser_session_id,
|
browser_session_id=browser_session_id,
|
||||||
browser_address=browser_address,
|
browser_address=browser_address,
|
||||||
|
download_timeout=download_timeout,
|
||||||
)
|
)
|
||||||
session.add(new_task)
|
session.add(new_task)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|||||||
@@ -109,6 +109,7 @@ class TaskModel(Base):
|
|||||||
)
|
)
|
||||||
model = Column(JSON, nullable=True)
|
model = Column(JSON, nullable=True)
|
||||||
browser_address = Column(String, nullable=True)
|
browser_address = Column(String, nullable=True)
|
||||||
|
download_timeout = Column(Numeric, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
class StepModel(Base):
|
class StepModel(Base):
|
||||||
|
|||||||
@@ -158,6 +158,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p
|
|||||||
max_screenshot_scrolls=task_obj.max_screenshot_scrolling_times,
|
max_screenshot_scrolls=task_obj.max_screenshot_scrolling_times,
|
||||||
browser_session_id=task_obj.browser_session_id,
|
browser_session_id=task_obj.browser_session_id,
|
||||||
browser_address=task_obj.browser_address,
|
browser_address=task_obj.browser_address,
|
||||||
|
download_timeout=task_obj.download_timeout,
|
||||||
)
|
)
|
||||||
return task
|
return task
|
||||||
|
|
||||||
|
|||||||
@@ -113,6 +113,11 @@ class TaskBase(BaseModel):
|
|||||||
description="The CDP address for the task.",
|
description="The CDP address for the task.",
|
||||||
examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"],
|
examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"],
|
||||||
)
|
)
|
||||||
|
download_timeout: float | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="The maximum time to wait for downloads to complete, in minutes. If not set, defaults to BROWSER_DOWNLOAD_TIMEOUT minutes.",
|
||||||
|
examples=[15.0],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TaskRequest(TaskBase):
|
class TaskRequest(TaskBase):
|
||||||
|
|||||||
@@ -422,6 +422,7 @@ class BaseTaskBlock(Block):
|
|||||||
cache_actions: bool = False
|
cache_actions: bool = False
|
||||||
complete_verification: bool = True
|
complete_verification: bool = True
|
||||||
include_action_history_in_verification: bool = False
|
include_action_history_in_verification: bool = False
|
||||||
|
download_timeout: float | None = None # minutes
|
||||||
|
|
||||||
def get_all_parameters(
|
def get_all_parameters(
|
||||||
self,
|
self,
|
||||||
@@ -631,6 +632,7 @@ class BaseTaskBlock(Block):
|
|||||||
failure_reason=str(e),
|
failure_reason=str(e),
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# add screenshot artifact for the first task
|
# add screenshot artifact for the first task
|
||||||
screenshot = await browser_state.take_fullpage_screenshot(
|
screenshot = await browser_state.take_fullpage_screenshot(
|
||||||
|
|||||||
@@ -2490,6 +2490,7 @@ class WorkflowService:
|
|||||||
cache_actions=block_yaml.cache_actions,
|
cache_actions=block_yaml.cache_actions,
|
||||||
complete_on_download=True,
|
complete_on_download=True,
|
||||||
complete_verification=True,
|
complete_verification=True,
|
||||||
|
download_timeout=block_yaml.download_timeout,
|
||||||
)
|
)
|
||||||
elif block_yaml.block_type == BlockType.TaskV2:
|
elif block_yaml.block_type == BlockType.TaskV2:
|
||||||
return TaskV2Block(
|
return TaskV2Block(
|
||||||
|
|||||||
@@ -433,6 +433,7 @@ class FileDownloadBlockYAML(BlockYAML):
|
|||||||
totp_verification_url: str | None = None
|
totp_verification_url: str | None = None
|
||||||
totp_identifier: str | None = None
|
totp_identifier: str | None = None
|
||||||
cache_actions: bool = False
|
cache_actions: bool = False
|
||||||
|
download_timeout: float | None = None
|
||||||
|
|
||||||
|
|
||||||
class UrlBlockYAML(BlockYAML):
|
class UrlBlockYAML(BlockYAML):
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from skyvern.config import settings
|
|||||||
from skyvern.constants import (
|
from skyvern.constants import (
|
||||||
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
|
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
|
||||||
BROWSER_DOWNLOAD_MAX_WAIT_TIME,
|
BROWSER_DOWNLOAD_MAX_WAIT_TIME,
|
||||||
|
BROWSER_DOWNLOAD_TIMEOUT,
|
||||||
DROPDOWN_MENU_MAX_DISTANCE,
|
DROPDOWN_MENU_MAX_DISTANCE,
|
||||||
REPO_ROOT_DIR,
|
REPO_ROOT_DIR,
|
||||||
SKYVERN_ID_ATTR,
|
SKYVERN_ID_ATTR,
|
||||||
@@ -864,7 +865,7 @@ async def handle_click_to_download_file_action(
|
|||||||
"Checking if there is any new files after click",
|
"Checking if there is any new files after click",
|
||||||
download_dir=download_dir,
|
download_dir=download_dir,
|
||||||
)
|
)
|
||||||
async with asyncio.timeout(BROWSER_DOWNLOAD_MAX_WAIT_TIME):
|
async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
|
||||||
while True:
|
while True:
|
||||||
list_files_after = list_files_in_directory(download_dir)
|
list_files_after = list_files_in_directory(download_dir)
|
||||||
if task.browser_session_id:
|
if task.browser_session_id:
|
||||||
@@ -913,7 +914,9 @@ async def handle_click_to_download_file_action(
|
|||||||
workflow_run_id=task.workflow_run_id,
|
workflow_run_id=task.workflow_run_id,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await wait_for_download_finished(downloading_files=downloading_files)
|
await wait_for_download_finished(
|
||||||
|
downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT
|
||||||
|
)
|
||||||
except DownloadFileMaxWaitingTime as e:
|
except DownloadFileMaxWaitingTime as e:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"There're several long-time downloading files, these files might be broken",
|
"There're several long-time downloading files, these files might be broken",
|
||||||
|
|||||||
@@ -70,12 +70,14 @@ def set_browser_console_log(browser_context: BrowserContext, browser_artifacts:
|
|||||||
browser_context.on("console", browser_console_log)
|
browser_context.on("console", browser_console_log)
|
||||||
|
|
||||||
|
|
||||||
def set_download_file_listener(browser_context: BrowserContext, **kwargs: Any) -> None:
|
def set_download_file_listener(
|
||||||
|
browser_context: BrowserContext, download_timeout: float | None = None, **kwargs: Any
|
||||||
|
) -> None:
|
||||||
async def listen_to_download(download: Download) -> None:
|
async def listen_to_download(download: Download) -> None:
|
||||||
workflow_run_id = kwargs.get("workflow_run_id")
|
workflow_run_id = kwargs.get("workflow_run_id")
|
||||||
task_id = kwargs.get("task_id")
|
task_id = kwargs.get("task_id")
|
||||||
try:
|
try:
|
||||||
async with asyncio.timeout(BROWSER_DOWNLOAD_TIMEOUT):
|
async with asyncio.timeout(download_timeout or BROWSER_DOWNLOAD_TIMEOUT):
|
||||||
file_path = await download.path()
|
file_path = await download.path()
|
||||||
if file_path.suffix:
|
if file_path.suffix:
|
||||||
return
|
return
|
||||||
|
|||||||
Reference in New Issue
Block a user