SDK: Prompt-based locator (#4027)

This commit is contained in:
Stanislav Novosad
2025-11-21 19:13:42 -07:00
committed by GitHub
parent 90f51bcacb
commit 8fb46ef1ca
19 changed files with 899 additions and 4 deletions

View File

@@ -224,9 +224,11 @@ if typing.TYPE_CHECKING:
HumanInteractionBlockParametersItem_Output,
HumanInteractionBlockParametersItem_Workflow,
HumanInteractionBlockYaml,
ImprovePromptResponse,
InputOrSelectContext,
InputTextAction,
InputTextActionData,
LocateElementAction,
LoginBlock,
LoginBlockDataSchema,
LoginBlockParametersItem,
@@ -276,6 +278,7 @@ if typing.TYPE_CHECKING:
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
RunSdkActionRequestAction_LocateElement,
RunSdkActionResponse,
RunStatus,
Script,
@@ -469,7 +472,7 @@ if typing.TYPE_CHECKING:
WorkflowStatus,
)
from .errors import BadRequestError, ForbiddenError, NotFoundError, UnprocessableEntityError
from . import browser_profiles, scripts, workflows
from . import browser_profiles, prompts, scripts, workflows
from .client import AsyncSkyvern, Skyvern
from .environment import SkyvernEnvironment
from .version import __version__
@@ -694,9 +697,11 @@ _dynamic_imports: typing.Dict[str, str] = {
"HumanInteractionBlockParametersItem_Output": ".types",
"HumanInteractionBlockParametersItem_Workflow": ".types",
"HumanInteractionBlockYaml": ".types",
"ImprovePromptResponse": ".types",
"InputOrSelectContext": ".types",
"InputTextAction": ".types",
"InputTextActionData": ".types",
"LocateElementAction": ".types",
"LoginBlock": ".types",
"LoginBlockDataSchema": ".types",
"LoginBlockParametersItem": ".types",
@@ -747,6 +752,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"RunSdkActionRequestAction_AiSelectOption": ".types",
"RunSdkActionRequestAction_AiUploadFile": ".types",
"RunSdkActionRequestAction_Extract": ".types",
"RunSdkActionRequestAction_LocateElement": ".types",
"RunSdkActionResponse": ".types",
"RunStatus": ".types",
"Script": ".types",
@@ -943,6 +949,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"WorkflowStatus": ".types",
"__version__": ".version",
"browser_profiles": ".browser_profiles",
"prompts": ".prompts",
"scripts": ".scripts",
"workflows": ".workflows",
}
@@ -1190,9 +1197,11 @@ __all__ = [
"HumanInteractionBlockParametersItem_Output",
"HumanInteractionBlockParametersItem_Workflow",
"HumanInteractionBlockYaml",
"ImprovePromptResponse",
"InputOrSelectContext",
"InputTextAction",
"InputTextActionData",
"LocateElementAction",
"LoginBlock",
"LoginBlockDataSchema",
"LoginBlockParametersItem",
@@ -1243,6 +1252,7 @@ __all__ = [
"RunSdkActionRequestAction_AiSelectOption",
"RunSdkActionRequestAction_AiUploadFile",
"RunSdkActionRequestAction_Extract",
"RunSdkActionRequestAction_LocateElement",
"RunSdkActionResponse",
"RunStatus",
"Script",
@@ -1439,6 +1449,7 @@ __all__ = [
"WorkflowStatus",
"__version__",
"browser_profiles",
"prompts",
"scripts",
"workflows",
]

View File

@@ -36,6 +36,7 @@ from .types.workflow_status import WorkflowStatus
if typing.TYPE_CHECKING:
from .browser_profiles.client import AsyncBrowserProfilesClient, BrowserProfilesClient
from .prompts.client import AsyncPromptsClient, PromptsClient
from .scripts.client import AsyncScriptsClient, ScriptsClient
from .workflows.client import AsyncWorkflowsClient, WorkflowsClient
# this is used as the default value for optional parameters
@@ -110,6 +111,7 @@ class Skyvern:
self._raw_client = RawSkyvern(client_wrapper=self._client_wrapper)
self._workflows: typing.Optional[WorkflowsClient] = None
self._browser_profiles: typing.Optional[BrowserProfilesClient] = None
self._prompts: typing.Optional[PromptsClient] = None
self._scripts: typing.Optional[ScriptsClient] = None
@property
@@ -1240,6 +1242,7 @@ class Skyvern:
totp_identifier: typing.Optional[str] = OMIT,
totp_url: typing.Optional[str] = OMIT,
browser_session_id: typing.Optional[str] = OMIT,
browser_profile_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT,
max_screenshot_scrolling_times: typing.Optional[int] = OMIT,
@@ -1283,6 +1286,9 @@ class Skyvern:
browser_session_id : typing.Optional[str]
ID of the browser session to use, which is prefixed by `pbs_` e.g. `pbs_123456`
browser_profile_id : typing.Optional[str]
ID of a browser profile to reuse for this run
browser_address : typing.Optional[str]
The CDP address for the task.
@@ -1347,6 +1353,7 @@ class Skyvern:
totp_identifier=totp_identifier,
totp_url=totp_url,
browser_session_id=browser_session_id,
browser_profile_id=browser_profile_id,
browser_address=browser_address,
extra_http_headers=extra_http_headers,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
@@ -1601,6 +1608,14 @@ class Skyvern:
self._browser_profiles = BrowserProfilesClient(client_wrapper=self._client_wrapper)
return self._browser_profiles
@property
def prompts(self):
if self._prompts is None:
from .prompts.client import PromptsClient # noqa: E402
self._prompts = PromptsClient(client_wrapper=self._client_wrapper)
return self._prompts
@property
def scripts(self):
if self._scripts is None:
@@ -1678,6 +1693,7 @@ class AsyncSkyvern:
self._raw_client = AsyncRawSkyvern(client_wrapper=self._client_wrapper)
self._workflows: typing.Optional[AsyncWorkflowsClient] = None
self._browser_profiles: typing.Optional[AsyncBrowserProfilesClient] = None
self._prompts: typing.Optional[AsyncPromptsClient] = None
self._scripts: typing.Optional[AsyncScriptsClient] = None
@property
@@ -2982,6 +2998,7 @@ class AsyncSkyvern:
totp_identifier: typing.Optional[str] = OMIT,
totp_url: typing.Optional[str] = OMIT,
browser_session_id: typing.Optional[str] = OMIT,
browser_profile_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT,
max_screenshot_scrolling_times: typing.Optional[int] = OMIT,
@@ -3025,6 +3042,9 @@ class AsyncSkyvern:
browser_session_id : typing.Optional[str]
ID of the browser session to use, which is prefixed by `pbs_` e.g. `pbs_123456`
browser_profile_id : typing.Optional[str]
ID of a browser profile to reuse for this run
browser_address : typing.Optional[str]
The CDP address for the task.
@@ -3097,6 +3117,7 @@ class AsyncSkyvern:
totp_identifier=totp_identifier,
totp_url=totp_url,
browser_session_id=browser_session_id,
browser_profile_id=browser_profile_id,
browser_address=browser_address,
extra_http_headers=extra_http_headers,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
@@ -3391,6 +3412,14 @@ class AsyncSkyvern:
self._browser_profiles = AsyncBrowserProfilesClient(client_wrapper=self._client_wrapper)
return self._browser_profiles
@property
def prompts(self):
if self._prompts is None:
from .prompts.client import AsyncPromptsClient # noqa: E402
self._prompts = AsyncPromptsClient(client_wrapper=self._client_wrapper)
return self._prompts
@property
def scripts(self):
if self._scripts is None:

View File

@@ -0,0 +1,4 @@
# This file was auto-generated by Fern from our API Definition.
# isort: skip_file

View File

@@ -0,0 +1,145 @@
# This file was auto-generated by Fern from our API Definition.
import typing
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.request_options import RequestOptions
from ..types.improve_prompt_response import ImprovePromptResponse
from .raw_client import AsyncRawPromptsClient, RawPromptsClient
# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)
class PromptsClient:
def __init__(self, *, client_wrapper: SyncClientWrapper):
self._raw_client = RawPromptsClient(client_wrapper=client_wrapper)
@property
def with_raw_response(self) -> RawPromptsClient:
"""
Retrieves a raw implementation of this client that returns raw responses.
Returns
-------
RawPromptsClient
"""
return self._raw_client
def improve_prompt(
self,
*,
use_case: str,
prompt: str,
context: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> ImprovePromptResponse:
"""
Improve a prompt based on a specific use-case
Parameters
----------
use_case : str
The use-case for prompt improvement
prompt : str
The original prompt to improve
context : typing.Optional[str]
Additional context about the user's needs
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
ImprovePromptResponse
Successful Response
Examples
--------
from skyvern import Skyvern
client = Skyvern(
api_key="YOUR_API_KEY",
)
client.prompts.improve_prompt(
use_case="use-case",
prompt="prompt",
)
"""
_response = self._raw_client.improve_prompt(
use_case=use_case, prompt=prompt, context=context, request_options=request_options
)
return _response.data
class AsyncPromptsClient:
def __init__(self, *, client_wrapper: AsyncClientWrapper):
self._raw_client = AsyncRawPromptsClient(client_wrapper=client_wrapper)
@property
def with_raw_response(self) -> AsyncRawPromptsClient:
"""
Retrieves a raw implementation of this client that returns raw responses.
Returns
-------
AsyncRawPromptsClient
"""
return self._raw_client
async def improve_prompt(
self,
*,
use_case: str,
prompt: str,
context: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> ImprovePromptResponse:
"""
Improve a prompt based on a specific use-case
Parameters
----------
use_case : str
The use-case for prompt improvement
prompt : str
The original prompt to improve
context : typing.Optional[str]
Additional context about the user's needs
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
ImprovePromptResponse
Successful Response
Examples
--------
import asyncio
from skyvern import AsyncSkyvern
client = AsyncSkyvern(
api_key="YOUR_API_KEY",
)
async def main() -> None:
await client.prompts.improve_prompt(
use_case="use-case",
prompt="prompt",
)
asyncio.run(main())
"""
_response = await self._raw_client.improve_prompt(
use_case=use_case, prompt=prompt, context=context, request_options=request_options
)
return _response.data

View File

@@ -0,0 +1,169 @@
# This file was auto-generated by Fern from our API Definition.
import typing
from json.decoder import JSONDecodeError
from ..core.api_error import ApiError
from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ..core.http_response import AsyncHttpResponse, HttpResponse
from ..core.pydantic_utilities import parse_obj_as
from ..core.request_options import RequestOptions
from ..errors.unprocessable_entity_error import UnprocessableEntityError
from ..types.improve_prompt_response import ImprovePromptResponse
# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)
class RawPromptsClient:
def __init__(self, *, client_wrapper: SyncClientWrapper):
self._client_wrapper = client_wrapper
def improve_prompt(
self,
*,
use_case: str,
prompt: str,
context: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> HttpResponse[ImprovePromptResponse]:
"""
Improve a prompt based on a specific use-case
Parameters
----------
use_case : str
The use-case for prompt improvement
prompt : str
The original prompt to improve
context : typing.Optional[str]
Additional context about the user's needs
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
HttpResponse[ImprovePromptResponse]
Successful Response
"""
_response = self._client_wrapper.httpx_client.request(
"v1/prompts/improve",
method="POST",
params={
"use-case": use_case,
},
json={
"context": context,
"prompt": prompt,
},
headers={
"content-type": "application/json",
},
request_options=request_options,
omit=OMIT,
)
try:
if 200 <= _response.status_code < 300:
_data = typing.cast(
ImprovePromptResponse,
parse_obj_as(
type_=ImprovePromptResponse, # type: ignore
object_=_response.json(),
),
)
return HttpResponse(response=_response, data=_data)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
class AsyncRawPromptsClient:
def __init__(self, *, client_wrapper: AsyncClientWrapper):
self._client_wrapper = client_wrapper
async def improve_prompt(
self,
*,
use_case: str,
prompt: str,
context: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> AsyncHttpResponse[ImprovePromptResponse]:
"""
Improve a prompt based on a specific use-case
Parameters
----------
use_case : str
The use-case for prompt improvement
prompt : str
The original prompt to improve
context : typing.Optional[str]
Additional context about the user's needs
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
AsyncHttpResponse[ImprovePromptResponse]
Successful Response
"""
_response = await self._client_wrapper.httpx_client.request(
"v1/prompts/improve",
method="POST",
params={
"use-case": use_case,
},
json={
"context": context,
"prompt": prompt,
},
headers={
"content-type": "application/json",
},
request_options=request_options,
omit=OMIT,
)
try:
if 200 <= _response.status_code < 300:
_data = typing.cast(
ImprovePromptResponse,
parse_obj_as(
type_=ImprovePromptResponse, # type: ignore
object_=_response.json(),
),
)
return AsyncHttpResponse(response=_response, data=_data)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)

View File

@@ -1683,6 +1683,7 @@ class RawSkyvern:
totp_identifier: typing.Optional[str] = OMIT,
totp_url: typing.Optional[str] = OMIT,
browser_session_id: typing.Optional[str] = OMIT,
browser_profile_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT,
max_screenshot_scrolling_times: typing.Optional[int] = OMIT,
@@ -1726,6 +1727,9 @@ class RawSkyvern:
browser_session_id : typing.Optional[str]
ID of the browser session to use, which is prefixed by `pbs_` e.g. `pbs_123456`
browser_profile_id : typing.Optional[str]
ID of a browser profile to reuse for this run
browser_address : typing.Optional[str]
The CDP address for the task.
@@ -1782,6 +1786,7 @@ class RawSkyvern:
"totp_identifier": totp_identifier,
"totp_url": totp_url,
"browser_session_id": browser_session_id,
"browser_profile_id": browser_profile_id,
"browser_address": browser_address,
"extra_http_headers": extra_http_headers,
"max_screenshot_scrolling_times": max_screenshot_scrolling_times,
@@ -3799,6 +3804,7 @@ class AsyncRawSkyvern:
totp_identifier: typing.Optional[str] = OMIT,
totp_url: typing.Optional[str] = OMIT,
browser_session_id: typing.Optional[str] = OMIT,
browser_profile_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT,
max_screenshot_scrolling_times: typing.Optional[int] = OMIT,
@@ -3842,6 +3848,9 @@ class AsyncRawSkyvern:
browser_session_id : typing.Optional[str]
ID of the browser session to use, which is prefixed by `pbs_` e.g. `pbs_123456`
browser_profile_id : typing.Optional[str]
ID of a browser profile to reuse for this run
browser_address : typing.Optional[str]
The CDP address for the task.
@@ -3898,6 +3907,7 @@ class AsyncRawSkyvern:
"totp_identifier": totp_identifier,
"totp_url": totp_url,
"browser_session_id": browser_session_id,
"browser_profile_id": browser_profile_id,
"browser_address": browser_address,
"extra_http_headers": extra_http_headers,
"max_screenshot_scrolling_times": max_screenshot_scrolling_times,

View File

@@ -245,9 +245,11 @@ if typing.TYPE_CHECKING:
HumanInteractionBlockParametersItem_Workflow,
)
from .human_interaction_block_yaml import HumanInteractionBlockYaml
from .improve_prompt_response import ImprovePromptResponse
from .input_or_select_context import InputOrSelectContext
from .input_text_action import InputTextAction
from .input_text_action_data import InputTextActionData
from .locate_element_action import LocateElementAction
from .login_block import LoginBlock
from .login_block_data_schema import LoginBlockDataSchema
from .login_block_parameters_item import (
@@ -302,6 +304,7 @@ if typing.TYPE_CHECKING:
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
RunSdkActionRequestAction_LocateElement,
)
from .run_sdk_action_response import RunSdkActionResponse
from .run_status import RunStatus
@@ -730,9 +733,11 @@ _dynamic_imports: typing.Dict[str, str] = {
"HumanInteractionBlockParametersItem_Output": ".human_interaction_block_parameters_item",
"HumanInteractionBlockParametersItem_Workflow": ".human_interaction_block_parameters_item",
"HumanInteractionBlockYaml": ".human_interaction_block_yaml",
"ImprovePromptResponse": ".improve_prompt_response",
"InputOrSelectContext": ".input_or_select_context",
"InputTextAction": ".input_text_action",
"InputTextActionData": ".input_text_action_data",
"LocateElementAction": ".locate_element_action",
"LoginBlock": ".login_block",
"LoginBlockDataSchema": ".login_block_data_schema",
"LoginBlockParametersItem": ".login_block_parameters_item",
@@ -782,6 +787,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"RunSdkActionRequestAction_AiSelectOption": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_AiUploadFile": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_Extract": ".run_sdk_action_request_action",
"RunSdkActionRequestAction_LocateElement": ".run_sdk_action_request_action",
"RunSdkActionResponse": ".run_sdk_action_response",
"RunStatus": ".run_status",
"Script": ".script",
@@ -1215,9 +1221,11 @@ __all__ = [
"HumanInteractionBlockParametersItem_Output",
"HumanInteractionBlockParametersItem_Workflow",
"HumanInteractionBlockYaml",
"ImprovePromptResponse",
"InputOrSelectContext",
"InputTextAction",
"InputTextActionData",
"LocateElementAction",
"LoginBlock",
"LoginBlockDataSchema",
"LoginBlockParametersItem",
@@ -1267,6 +1275,7 @@ __all__ = [
"RunSdkActionRequestAction_AiSelectOption",
"RunSdkActionRequestAction_AiUploadFile",
"RunSdkActionRequestAction_Extract",
"RunSdkActionRequestAction_LocateElement",
"RunSdkActionResponse",
"RunStatus",
"Script",

View File

@@ -0,0 +1,32 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
class ImprovePromptResponse(UniversalBaseModel):
error: typing.Optional[str] = pydantic.Field(default=None)
"""
Error message if prompt improvement failed
"""
improved: str = pydantic.Field()
"""
The improved version of the prompt
"""
original: str = pydantic.Field()
"""
The original prompt provided for improvement
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,26 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
class LocateElementAction(UniversalBaseModel):
"""
Locate element action parameters.
"""
prompt: typing.Optional[str] = pydantic.Field(default=None)
"""
Natural language prompt to locate an element
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -145,6 +145,24 @@ class RunSdkActionRequestAction_Extract(UniversalBaseModel):
extra = pydantic.Extra.allow
class RunSdkActionRequestAction_LocateElement(UniversalBaseModel):
"""
The action to execute with its specific parameters
"""
type: typing.Literal["locate_element"] = "locate_element"
prompt: typing.Optional[str] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
RunSdkActionRequestAction = typing.Union[
RunSdkActionRequestAction_AiAct,
RunSdkActionRequestAction_AiClick,
@@ -152,4 +170,5 @@ RunSdkActionRequestAction = typing.Union[
RunSdkActionRequestAction_AiSelectOption,
RunSdkActionRequestAction_AiUploadFile,
RunSdkActionRequestAction_Extract,
RunSdkActionRequestAction_LocateElement,
]