Browser recording: events to blocks (#4195)
This commit is contained in:
@@ -32,6 +32,7 @@ from skyvern.forge.sdk.services.credential.custom_credential_vault_service impor
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.forge.sdk.workflow.context_manager import WorkflowContextManager
|
||||
from skyvern.forge.sdk.workflow.service import WorkflowService
|
||||
from skyvern.services.browser_recording.service import BrowserSessionRecordingService
|
||||
from skyvern.webeye.browser_manager import BrowserManager
|
||||
from skyvern.webeye.persistent_sessions_manager import PersistentSessionsManager
|
||||
from skyvern.webeye.real_browser_manager import RealBrowserManager
|
||||
@@ -69,6 +70,7 @@ class ForgeApp:
|
||||
WORKFLOW_SERVICE: WorkflowService
|
||||
AGENT_FUNCTION: AgentFunction
|
||||
PERSISTENT_SESSIONS_MANAGER: PersistentSessionsManager
|
||||
BROWSER_SESSION_RECORDING_SERVICE: BrowserSessionRecordingService
|
||||
BITWARDEN_CREDENTIAL_VAULT_SERVICE: BitwardenCredentialVaultService
|
||||
AZURE_CREDENTIAL_VAULT_SERVICE: AzureCredentialVaultService | None
|
||||
CUSTOM_CREDENTIAL_VAULT_SERVICE: CustomCredentialVaultService | None
|
||||
@@ -177,6 +179,7 @@ def create_forge_app() -> ForgeApp:
|
||||
app.WORKFLOW_SERVICE = WorkflowService()
|
||||
app.AGENT_FUNCTION = AgentFunction()
|
||||
app.PERSISTENT_SESSIONS_MANAGER = PersistentSessionsManager(database=app.DATABASE)
|
||||
app.BROWSER_SESSION_RECORDING_SERVICE = BrowserSessionRecordingService()
|
||||
|
||||
app.AZURE_CLIENT_FACTORY = RealAzureClientFactory()
|
||||
app.BITWARDEN_CREDENTIAL_VAULT_SERVICE = BitwardenCredentialVaultService()
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
Given a browser action, come up with a templated one-line prompt suitable for a browser agent, a block label, and a title.
|
||||
|
||||
The templated prompt should have one jinja variable in it. Come up with a good name for the variable that is
|
||||
lower case, no spaces, underscores permitted.
|
||||
|
||||
Example: "Enter {{ address }} into the address field."
|
||||
|
||||
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||
|
||||
Reply in JSON format with the following keys:
|
||||
{
|
||||
"prompt": str, // A templated, one-line prompt suitable for a browser agent describing the user action.
|
||||
"title": str, // A descriptive and informative title for the goal. Use no more than 5 words
|
||||
"block_label": str, // A label for the block. Lower case. Based off of the "title". Underscores are permitted.
|
||||
"parameter_name": { "key": str } // The name of the parameter being input. Lower case, no spaces. Underscores are permitted.
|
||||
}
|
||||
|
||||
User action:
|
||||
```
|
||||
{{ action }}
|
||||
```
|
||||
@@ -0,0 +1,15 @@
|
||||
Given a browser action, come up with a one-line prompt suitable for a browser agent, a block label, and a title.
|
||||
|
||||
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||
|
||||
Reply in JSON format with the following keys:
|
||||
{
|
||||
"prompt": str, // A one-line prompt suitable for a browser agent describing the user action.
|
||||
"title": str, // A descriptive and informative title for the goal. Use no more than 5 words
|
||||
"block_label": str // A label for the block. Lower case. Based off of the "title". Underscores are permitted.
|
||||
}
|
||||
|
||||
User action:
|
||||
```
|
||||
{{ action }}
|
||||
```
|
||||
@@ -0,0 +1,13 @@
|
||||
Given a "go to URL" action, come up with a block label suitable for a browser agent. It should include a short version of a url, if the url exists.
|
||||
|
||||
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||
|
||||
Reply in JSON format with the following keys:
|
||||
{
|
||||
"block_label": str // A label for the block. Lower case. Underscores are permitted.
|
||||
}
|
||||
|
||||
go to URL action:
|
||||
```
|
||||
{{ action }}
|
||||
```
|
||||
15
skyvern/forge/prompts/skyvern/recording-wait-block-prompt.j2
Normal file
15
skyvern/forge/prompts/skyvern/recording-wait-block-prompt.j2
Normal file
@@ -0,0 +1,15 @@
|
||||
Given a wait action, come up with a block label. It should include the duration in seconds, and a short version of a url, if the url exists.
|
||||
|
||||
If the duration for the wait action is in milliseconds, so be sure to convert to seconds.
|
||||
|
||||
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||
|
||||
Reply in JSON format with the following keys:
|
||||
{
|
||||
"block_label": str // A label for the block. Lower case. Underscores are permitted.
|
||||
}
|
||||
|
||||
Wait action:
|
||||
```
|
||||
{{ action }}
|
||||
```
|
||||
@@ -18,7 +18,11 @@ from skyvern.forge.sdk.routes.code_samples import (
|
||||
from skyvern.forge.sdk.routes.routers import base_router
|
||||
from skyvern.forge.sdk.schemas.organizations import Organization
|
||||
from skyvern.forge.sdk.services import org_auth_service
|
||||
from skyvern.schemas.browser_sessions import CreateBrowserSessionRequest
|
||||
from skyvern.schemas.browser_sessions import (
|
||||
CreateBrowserSessionRequest,
|
||||
ProcessBrowserSessionRecordingRequest,
|
||||
ProcessBrowserSessionRecordingResponse,
|
||||
)
|
||||
from skyvern.webeye.schemas import BrowserSessionResponse
|
||||
|
||||
|
||||
@@ -217,3 +221,29 @@ async def get_browser_sessions(
|
||||
for browser_session in browser_sessions
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@base_router.post(
|
||||
"/browser_sessions/{browser_session_id}/process_recording",
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def process_recording(
|
||||
browser_session_id: str = Path(..., description="The ID of the browser session.", examples=["pbs_123456"]),
|
||||
recording_request: ProcessBrowserSessionRecordingRequest = ProcessBrowserSessionRecordingRequest(),
|
||||
current_org: Organization = Depends(org_auth_service.get_current_org),
|
||||
) -> ProcessBrowserSessionRecordingResponse:
|
||||
browser_session = await app.PERSISTENT_SESSIONS_MANAGER.get_session(
|
||||
browser_session_id,
|
||||
current_org.organization_id,
|
||||
)
|
||||
if not browser_session:
|
||||
raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found")
|
||||
|
||||
blocks, parameters = await app.BROWSER_SESSION_RECORDING_SERVICE.process_recording(
|
||||
organization_id=current_org.organization_id,
|
||||
browser_session_id=browser_session_id,
|
||||
compressed_chunks=recording_request.compressed_chunks,
|
||||
workflow_permanent_id=recording_request.workflow_permanent_id,
|
||||
)
|
||||
|
||||
return ProcessBrowserSessionRecordingResponse(blocks=blocks, parameters=parameters)
|
||||
|
||||
@@ -14,6 +14,7 @@ import asyncio
|
||||
import dataclasses
|
||||
import enum
|
||||
import json
|
||||
import time
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
@@ -39,6 +40,7 @@ class ExfiltratedEvent:
|
||||
# TODO(jdo): improve typing for params
|
||||
params: dict = dataclasses.field(default_factory=dict)
|
||||
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
||||
timestamp: float = dataclasses.field(default_factory=lambda: time.time()) # seconds since epoch
|
||||
|
||||
|
||||
OnExfiltrationEvent = t.Callable[[list[ExfiltratedEvent]], None]
|
||||
@@ -68,6 +70,7 @@ class ExfiltrationChannel(CdpChannel):
|
||||
event_name="user_interaction",
|
||||
params=event_data,
|
||||
source=ExfiltratedEventSource.CONSOLE,
|
||||
timestamp=time.time(),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -84,20 +87,25 @@ class ExfiltrationChannel(CdpChannel):
|
||||
event_name=event_name,
|
||||
params=params,
|
||||
source=ExfiltratedEventSource.CDP,
|
||||
timestamp=time.time(),
|
||||
),
|
||||
]
|
||||
|
||||
self.on_event(messages)
|
||||
|
||||
if event_name in ("frame_navigated", "navigated_within_document"):
|
||||
# optimistically re-apply exfiltration and decoration on navigation
|
||||
# (these operations should be idempotent)
|
||||
pages = self.browser_context.pages if self.browser_context else []
|
||||
LOG.info(f"{self.class_name} re-applying exfiltration and decoration on navigation.", event_name=event_name)
|
||||
async def adorn(self, page: Page) -> t.Self:
|
||||
"""Add a mouse-following follower to the page."""
|
||||
if page.url.startswith("devtools:"):
|
||||
return self
|
||||
|
||||
for page in pages:
|
||||
asyncio.create_task(self.exfiltrate(page))
|
||||
asyncio.create_task(self.decorate(page))
|
||||
LOG.info(f"{self.class_name} adorning page.", url=page.url)
|
||||
|
||||
(await page.evaluate(self.js("adorn")),)
|
||||
(await page.add_init_script(self.js("adorn")),)
|
||||
|
||||
LOG.info(f"{self.class_name} adornment complete on page.", url=page.url)
|
||||
|
||||
return self
|
||||
|
||||
async def connect(self, cdp_url: str | None = None) -> t.Self:
|
||||
if self.browser and self.browser.is_connected() and self.cdp_session:
|
||||
@@ -121,12 +129,18 @@ class ExfiltrationChannel(CdpChannel):
|
||||
async def exfiltrate(self, page: Page) -> t.Self:
|
||||
"""
|
||||
Track user interactions and send to console for CDP to capture.
|
||||
|
||||
Uses add_init_script to ensure the exfiltration script is re-injected
|
||||
on every navigation (including address bar navigations).
|
||||
"""
|
||||
if page.url.startswith("devtools:"):
|
||||
return self
|
||||
|
||||
LOG.info(f"{self.class_name} setting up exfiltration on new page.", url=page.url)
|
||||
|
||||
page.on("console", self._handle_console_event)
|
||||
|
||||
await page.add_init_script(self.js("exfiltrate"))
|
||||
await page.evaluate(self.js("exfiltrate"))
|
||||
|
||||
LOG.info(f"{self.class_name} setup complete on page.", url=page.url)
|
||||
@@ -135,8 +149,12 @@ class ExfiltrationChannel(CdpChannel):
|
||||
|
||||
async def decorate(self, page: Page) -> t.Self:
|
||||
"""Add a mouse-following follower to the page."""
|
||||
if page.url.startswith("devtools:"):
|
||||
return self
|
||||
|
||||
LOG.info(f"{self.class_name} adding decoration to page.", url=page.url)
|
||||
|
||||
await page.add_init_script(self.js("decorate"))
|
||||
await page.evaluate(self.js("decorate"))
|
||||
|
||||
LOG.info(f"{self.class_name} decoration setup complete on page.", url=page.url)
|
||||
@@ -145,8 +163,12 @@ class ExfiltrationChannel(CdpChannel):
|
||||
|
||||
async def undecorate(self, page: Page) -> t.Self:
|
||||
"""Remove the mouse-following follower from the page."""
|
||||
if page.url.startswith("devtools:"):
|
||||
return self
|
||||
|
||||
LOG.info(f"{self.class_name} removing decoration from page.", url=page.url)
|
||||
|
||||
await page.add_init_script(self.js("undecorate"))
|
||||
await page.evaluate(self.js("undecorate"))
|
||||
|
||||
LOG.info(f"{self.class_name} decoration removed from page.", url=page.url)
|
||||
@@ -174,10 +196,35 @@ class ExfiltrationChannel(CdpChannel):
|
||||
cdp_session.on("Target.targetCreated", lambda params: self._handle_cdp_event("target_created", params))
|
||||
cdp_session.on("Target.targetDestroyed", lambda params: self._handle_cdp_event("target_destroyed", params))
|
||||
cdp_session.on("Target.targetInfoChanged", lambda params: self._handle_cdp_event("target_info_changed", params))
|
||||
cdp_session.on("Page.frameNavigated", lambda params: self._handle_cdp_event("frame_navigated", params))
|
||||
cdp_session.on(
|
||||
"Page.navigatedWithinDocument", lambda params: self._handle_cdp_event("navigated_within_document", params)
|
||||
"Page.frameRequestedNavigation",
|
||||
lambda params: self._handle_cdp_event("nav:frame_requested_navigation", params),
|
||||
)
|
||||
cdp_session.on(
|
||||
"Page.frameStartedNavigating", lambda params: self._handle_cdp_event("nav:frame_started_navigating", params)
|
||||
)
|
||||
cdp_session.on("Page.frameNavigated", lambda params: self._handle_cdp_event("nav:frame_navigated", params))
|
||||
cdp_session.on(
|
||||
"Page.navigatedWithinDocument",
|
||||
lambda params: self._handle_cdp_event("nav:navigated_within_document", params),
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
async def enable_adornment(self) -> t.Self:
|
||||
browser_context = self.browser_context
|
||||
|
||||
if not browser_context:
|
||||
LOG.error(f"{self.class_name} no browser context to enable adornment.")
|
||||
return self
|
||||
|
||||
tasks: list[asyncio.Task] = []
|
||||
for page in browser_context.pages:
|
||||
tasks.append(asyncio.create_task(self.adorn(page)))
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
browser_context.on("page", lambda page: asyncio.create_task(self.adorn(page)))
|
||||
|
||||
return self
|
||||
|
||||
@@ -214,6 +261,8 @@ class ExfiltrationChannel(CdpChannel):
|
||||
|
||||
await self.enable_cdp_events()
|
||||
|
||||
await self.enable_adornment()
|
||||
|
||||
self.enable_console_events()
|
||||
|
||||
self.enable_decoration()
|
||||
@@ -236,7 +285,10 @@ class ExfiltrationChannel(CdpChannel):
|
||||
pages = self.browser_context.pages if self.browser_context else []
|
||||
|
||||
for page in pages:
|
||||
page.remove_listener("console", self._handle_console_event)
|
||||
try:
|
||||
page.remove_listener("console", self._handle_console_event)
|
||||
except KeyError:
|
||||
pass # listener not found
|
||||
await self.undecorate(page)
|
||||
|
||||
LOG.info(f"{self.class_name} stopped.")
|
||||
|
||||
97
skyvern/forge/sdk/routes/streaming/channels/js/adorn.js
Normal file
97
skyvern/forge/sdk/routes/streaming/channels/js/adorn.js
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* DOM-adornment: assign stable identifiers to all DOM elements.
|
||||
*/
|
||||
|
||||
(function () {
|
||||
console.log("[SYS] adornment evaluated");
|
||||
|
||||
window.__skyvern_assignedEls = window.__skyvern_assignedEls ?? 0;
|
||||
|
||||
const visited = (window.__skyvern_visited =
|
||||
window.__skyvern_visited ?? new Set());
|
||||
|
||||
function __skyvern_generateUniqueId() {
|
||||
const timestamp = Date.now().toString(36);
|
||||
const randomPart = Math.random().toString(36).substring(2);
|
||||
|
||||
return `sky-${timestamp}-${randomPart}`;
|
||||
}
|
||||
|
||||
window.__skyvern_generateUniqueId = __skyvern_generateUniqueId;
|
||||
|
||||
function __skyvern_assignSkyIds(node) {
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (node.nodeType === 1) {
|
||||
if (!node.dataset.skyId) {
|
||||
window.__skyvern_assignedEls += 1;
|
||||
node.dataset.skyId = __skyvern_generateUniqueId();
|
||||
}
|
||||
|
||||
if (visited.has(node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
visited.add(node);
|
||||
|
||||
const children = node.querySelectorAll("*");
|
||||
|
||||
children.forEach((child) => {
|
||||
__skyvern_assignSkyIds(child);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (document.body) {
|
||||
__skyvern_assignSkyIds(document.body);
|
||||
console.log(
|
||||
"[SYS] adornment: initially assigned skyIds to elements:",
|
||||
window.__skyvern_assignedEls,
|
||||
);
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
__skyvern_assignSkyIds(document.body);
|
||||
console.log(
|
||||
"[SYS] adornment: assigned skyIds to elements on DOMContentLoaded:",
|
||||
window.__skyvern_assignedEls,
|
||||
);
|
||||
});
|
||||
|
||||
const observerConfig = {
|
||||
childList: true,
|
||||
subtree: true,
|
||||
};
|
||||
|
||||
const observer = new MutationObserver(function (mutationsList) {
|
||||
for (const mutation of mutationsList) {
|
||||
if (mutation.type === "childList") {
|
||||
mutation.addedNodes.forEach((node) => {
|
||||
__skyvern_assignSkyIds(node);
|
||||
console.log(
|
||||
"[SYS] adornment: assigned skyIds to new elements:",
|
||||
window.__skyvern_assignedEls,
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function observeWhenReady() {
|
||||
if (document.body) {
|
||||
observer.observe(document.body, observerConfig);
|
||||
} else {
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
if (document.body) {
|
||||
observer.observe(document.body, observerConfig);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
observeWhenReady();
|
||||
|
||||
window.__skyvern_adornment_observer = observer;
|
||||
})();
|
||||
@@ -1,109 +1,118 @@
|
||||
(function () {
|
||||
if (!window.__skyvern_decoration_initialized) {
|
||||
window.__skyvern_decoration_initialized = true;
|
||||
console.log("[SYS] decorate: evaluated");
|
||||
|
||||
window.__skyvern_create_mouse_follower = function () {
|
||||
// create the circle element
|
||||
const existingCircle = document.getElementById(
|
||||
"__skyvern_mouse_follower",
|
||||
);
|
||||
function initiate() {
|
||||
if (!window.__skyvern_decoration_initialized) {
|
||||
console.log("[SYS] decorate: initializing");
|
||||
|
||||
if (existingCircle) {
|
||||
return false;
|
||||
}
|
||||
window.__skyvern_decoration_initialized = true;
|
||||
|
||||
const circle = document.createElement("div");
|
||||
window.__skyvern_decoration_mouse_follower = circle;
|
||||
circle.id = "__skyvern_mouse_follower";
|
||||
circle.style.position = "fixed";
|
||||
circle.style.left = "0";
|
||||
circle.style.top = "0";
|
||||
circle.style.width = "30px";
|
||||
circle.style.height = "30px";
|
||||
circle.style.borderRadius = "50%";
|
||||
circle.style.backgroundColor = "rgba(255, 0, 0, 0.2)";
|
||||
circle.style.pointerEvents = "none";
|
||||
circle.style.zIndex = "999999";
|
||||
circle.style.willChange = "transform";
|
||||
document.body.appendChild(circle);
|
||||
window.__skyvern_create_mouse_follower = function () {
|
||||
const preexistingCircles = document.querySelectorAll(
|
||||
"#__skyvern_mouse_follower",
|
||||
);
|
||||
|
||||
return true;
|
||||
};
|
||||
if (preexistingCircles.length > 0) {
|
||||
for (const circle of preexistingCircles) {
|
||||
circle.remove();
|
||||
}
|
||||
}
|
||||
|
||||
const wasCreated = window.__skyvern_create_mouse_follower();
|
||||
const circle = document.createElement("div");
|
||||
window.__skyvern_decoration_mouse_follower = circle;
|
||||
circle.id = "__skyvern_mouse_follower";
|
||||
circle.style.position = "fixed";
|
||||
circle.style.left = "0";
|
||||
circle.style.top = "0";
|
||||
circle.style.width = "30px";
|
||||
circle.style.height = "30px";
|
||||
circle.style.borderRadius = "50%";
|
||||
circle.style.backgroundColor = "rgba(255, 0, 0, 0.2)";
|
||||
circle.style.pointerEvents = "none";
|
||||
circle.style.zIndex = "999999";
|
||||
circle.style.willChange = "transform";
|
||||
document.body.appendChild(circle);
|
||||
};
|
||||
|
||||
if (!wasCreated) {
|
||||
return;
|
||||
}
|
||||
window.__skyvern_create_mouse_follower();
|
||||
|
||||
let scale = 1;
|
||||
let targetScale = 1;
|
||||
let mouseX = 0;
|
||||
let mouseY = 0;
|
||||
let scale = 1;
|
||||
let targetScale = 1;
|
||||
let mouseX = 0;
|
||||
let mouseY = 0;
|
||||
|
||||
// smooth scale animation
|
||||
function animate() {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
|
||||
const follower = window.__skyvern_decoration_mouse_follower;
|
||||
|
||||
scale += (targetScale - scale) * 0.2;
|
||||
|
||||
if (Math.abs(targetScale - scale) > 0.001) {
|
||||
requestAnimationFrame(animate);
|
||||
} else {
|
||||
scale = targetScale;
|
||||
}
|
||||
|
||||
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
||||
}
|
||||
|
||||
// update follower position on mouse move
|
||||
document.addEventListener(
|
||||
"mousemove",
|
||||
(e) => {
|
||||
// smooth scale animation
|
||||
function animate() {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
|
||||
const follower = window.__skyvern_decoration_mouse_follower;
|
||||
mouseX = e.clientX;
|
||||
mouseY = e.clientY;
|
||||
|
||||
scale += (targetScale - scale) * 0.2;
|
||||
|
||||
if (Math.abs(targetScale - scale) > 0.001) {
|
||||
requestAnimationFrame(animate);
|
||||
} else {
|
||||
scale = targetScale;
|
||||
}
|
||||
|
||||
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
||||
},
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
// expand follower on mouse down
|
||||
document.addEventListener(
|
||||
"mousedown",
|
||||
() => {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
// update follower position on mouse move
|
||||
document.addEventListener(
|
||||
"mousemove",
|
||||
(e) => {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
|
||||
targetScale = 50 / 30;
|
||||
requestAnimationFrame(animate);
|
||||
},
|
||||
true,
|
||||
);
|
||||
const follower = window.__skyvern_decoration_mouse_follower;
|
||||
mouseX = e.clientX;
|
||||
mouseY = e.clientY;
|
||||
follower.style.transform = `translate(${mouseX - 15}px, ${mouseY - 15}px) scale(${scale})`;
|
||||
},
|
||||
true,
|
||||
);
|
||||
|
||||
// return follower to original size on mouse up
|
||||
document.addEventListener(
|
||||
"mouseup",
|
||||
() => {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
// expand follower on mouse down
|
||||
document.addEventListener(
|
||||
"mousedown",
|
||||
() => {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
|
||||
targetScale = 1;
|
||||
requestAnimationFrame(animate);
|
||||
},
|
||||
true,
|
||||
);
|
||||
targetScale = 50 / 30;
|
||||
requestAnimationFrame(animate);
|
||||
},
|
||||
true,
|
||||
);
|
||||
|
||||
// return follower to original size on mouse up
|
||||
document.addEventListener(
|
||||
"mouseup",
|
||||
() => {
|
||||
if (!window.__skyvern_decoration_mouse_follower) {
|
||||
return;
|
||||
}
|
||||
|
||||
targetScale = 1;
|
||||
requestAnimationFrame(animate);
|
||||
},
|
||||
true,
|
||||
);
|
||||
} else {
|
||||
window.__skyvern_create_mouse_follower();
|
||||
}
|
||||
}
|
||||
|
||||
if (document.body) {
|
||||
console.log("[SYS] decorate: document already loaded, initiating");
|
||||
initiate();
|
||||
} else {
|
||||
window.__skyvern_create_mouse_follower();
|
||||
console.log("[SYS] decorate: waiting for DOMContentLoaded to initiate");
|
||||
document.addEventListener("DOMContentLoaded", initiate);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
(function () {
|
||||
console.log("[SYS] exfiltration: evaluated");
|
||||
if (!window.__skyvern_exfiltration_initialized) {
|
||||
console.log("[SYS] exfiltration: initializing");
|
||||
window.__skyvern_exfiltration_initialized = true;
|
||||
|
||||
[
|
||||
@@ -55,6 +57,10 @@
|
||||
const getElementText = (element) => {
|
||||
const textSources = [];
|
||||
|
||||
if (!element.getAttribute) {
|
||||
return textSources;
|
||||
}
|
||||
|
||||
if (element.getAttribute("aria-label")) {
|
||||
textSources.push(element.getAttribute("aria-label"));
|
||||
}
|
||||
@@ -96,6 +102,52 @@
|
||||
return textSources.length > 0 ? textSources : [];
|
||||
};
|
||||
|
||||
const skyId = e.target?.dataset?.skyId || null;
|
||||
|
||||
if (!skyId && e.target?.tagName !== "HTML") {
|
||||
console.log("[SYS] exfiltration: target element has no skyId.");
|
||||
|
||||
if (window.__skyvern_generateUniqueId && e.target?.dataset) {
|
||||
const newSkyId = window.__skyvern_generateUniqueId();
|
||||
e.target.dataset.skyId = newSkyId;
|
||||
console.log(
|
||||
`[SYS] exfiltration: assigned new skyId to target element: ${newSkyId}`,
|
||||
);
|
||||
} else {
|
||||
console.log(
|
||||
"[SYS] exfiltration: cannot assign skyId, generator not found.",
|
||||
);
|
||||
|
||||
const info = {
|
||||
tagName: e.target?.tagName,
|
||||
target: e.target,
|
||||
targetType: typeof e.target,
|
||||
eventType,
|
||||
id: e.target?.id,
|
||||
className: e.target?.className,
|
||||
value: e.target?.value,
|
||||
text: getElementText(e.target),
|
||||
labels: getAssociatedLabels(e.target),
|
||||
skyId: e.target?.dataset?.skyId,
|
||||
};
|
||||
|
||||
try {
|
||||
const infoS = JSON.stringify(info, null, 2);
|
||||
console.log(
|
||||
`[SYS] exfiltration: target element info: ${infoS}`,
|
||||
);
|
||||
} catch (err) {
|
||||
console.log(
|
||||
"[SYS] exfiltration: target element info: [unserializable]",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const classText = String(
|
||||
e.target.classList?.value ?? e.target.getAttribute("class") ?? "",
|
||||
);
|
||||
|
||||
const eventData = {
|
||||
url: window.location.href,
|
||||
type: eventType,
|
||||
@@ -103,10 +155,13 @@
|
||||
target: {
|
||||
tagName: e.target?.tagName,
|
||||
id: e.target?.id,
|
||||
className: e.target?.className,
|
||||
isHtml: e.target instanceof HTMLElement,
|
||||
isSvg: e.target instanceof SVGElement,
|
||||
className: classText,
|
||||
value: e.target?.value,
|
||||
text: getElementText(e.target),
|
||||
labels: getAssociatedLabels(e.target),
|
||||
skyId: e.target?.dataset?.skyId,
|
||||
},
|
||||
inputValue: ["input", "focus", "blur"].includes(eventType)
|
||||
? e.target?.value
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
(function () {
|
||||
console.log("[SYS] undecorate: evaluated");
|
||||
|
||||
const followers = document.querySelectorAll("#__skyvern_mouse_follower");
|
||||
|
||||
for (const follower of followers) {
|
||||
|
||||
@@ -122,6 +122,7 @@ class MessageOutExfiltratedEvent(Message):
|
||||
# TODO(jdo): improve typing for params
|
||||
params: dict = dataclasses.field(default_factory=dict)
|
||||
source: ExfiltratedEventSource = ExfiltratedEventSource.NOT_SPECIFIED
|
||||
timestamp: float = dataclasses.field(default_factory=lambda: 0.0) # seconds since epoch
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -433,6 +434,7 @@ async def loop_stream_messages(message_channel: MessageChannel) -> None:
|
||||
event_name=event.event_name,
|
||||
params=event.params,
|
||||
source=t.cast(ExfiltratedEventSource, event.source or ExfiltratedEventSource.NOT_SPECIFIED),
|
||||
timestamp=event.timestamp,
|
||||
)
|
||||
|
||||
message_channel.send_nowait(messages=[message_out_exfiltrated_event])
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from skyvern.client.types.workflow_definition_yaml_blocks_item import WorkflowDefinitionYamlBlocksItem
|
||||
from skyvern.client.types.workflow_definition_yaml_parameters_item import WorkflowDefinitionYamlParametersItem_Workflow
|
||||
from skyvern.schemas.docs.doc_strings import PROXY_LOCATION_DOC_STRING
|
||||
from skyvern.schemas.runs import ProxyLocation
|
||||
|
||||
@@ -19,3 +21,25 @@ class CreateBrowserSessionRequest(BaseModel):
|
||||
default=None,
|
||||
description=PROXY_LOCATION_DOC_STRING,
|
||||
)
|
||||
|
||||
|
||||
class ProcessBrowserSessionRecordingRequest(BaseModel):
|
||||
compressed_chunks: list[str] = Field(
|
||||
default=[],
|
||||
description="List of base64 encoded and compressed (gzip) event strings representing the browser session recording.",
|
||||
)
|
||||
workflow_permanent_id: str = Field(
|
||||
default="no-such-wpid",
|
||||
description="Permanent ID of the workflow associated with the browser session recording.",
|
||||
)
|
||||
|
||||
|
||||
class ProcessBrowserSessionRecordingResponse(BaseModel):
|
||||
blocks: list[WorkflowDefinitionYamlBlocksItem] = Field(
|
||||
default=[],
|
||||
description="List of workflow blocks generated from the processed browser session recording.",
|
||||
)
|
||||
parameters: list[WorkflowDefinitionYamlParametersItem_Workflow] = Field(
|
||||
default=[],
|
||||
description="List of workflow parameters generated from the processed browser session recording.",
|
||||
)
|
||||
|
||||
3
skyvern/services/browser_recording/__init__.py
Normal file
3
skyvern/services/browser_recording/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .service import BrowserSessionRecordingService
|
||||
|
||||
__all__ = ["BrowserSessionRecordingService"]
|
||||
491
skyvern/services/browser_recording/service.py
Normal file
491
skyvern/services/browser_recording/service.py
Normal file
@@ -0,0 +1,491 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import pathlib
|
||||
import typing as t
|
||||
import zlib
|
||||
|
||||
import structlog
|
||||
|
||||
import skyvern.services.browser_recording.state_machines as sm
|
||||
from skyvern.client.types.workflow_definition_yaml_blocks_item import (
|
||||
WorkflowDefinitionYamlBlocksItem_Action,
|
||||
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||
)
|
||||
from skyvern.client.types.workflow_definition_yaml_parameters_item import WorkflowDefinitionYamlParametersItem_Workflow
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionBlockable,
|
||||
ActionKind,
|
||||
ActionUrlChange,
|
||||
ActionWait,
|
||||
ExfiltratedCdpEvent,
|
||||
ExfiltratedConsoleEvent,
|
||||
ExfiltratedEvent,
|
||||
OutputBlock,
|
||||
)
|
||||
|
||||
LOG = structlog.get_logger(__name__)
|
||||
|
||||
# avoid decompression bombs
|
||||
MAX_BASE64_SIZE = 14 * 1024 * 1024 # ~10MB compressed + base64 overhead
|
||||
|
||||
|
||||
class Processor:
|
||||
"""
|
||||
Process browser session recordings into workflow definition blocks.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
browser_session_id: str,
|
||||
organization_id: str,
|
||||
workflow_permanent_id: str,
|
||||
) -> None:
|
||||
self.browser_session_id = browser_session_id
|
||||
self.organization_id = organization_id
|
||||
self.workflow_permanent_id = workflow_permanent_id
|
||||
|
||||
@property
|
||||
def class_name(self) -> str:
|
||||
return self.__class__.__name__
|
||||
|
||||
@property
|
||||
def identity(self) -> dict[str, str]:
|
||||
return dict(
|
||||
browser_session_id=self.browser_session_id,
|
||||
organization_id=self.organization_id,
|
||||
workflow_permanent_id=self.workflow_permanent_id,
|
||||
)
|
||||
|
||||
def decompress(self, base64_payload: str) -> bytes | None:
|
||||
"""
|
||||
Decode a base64 string, decompress it using gzip, and return it.
|
||||
"""
|
||||
|
||||
if len(base64_payload) > MAX_BASE64_SIZE:
|
||||
LOG.warning(f"{self.class_name}: base64 payload too large: {len(base64_payload)} bytes", **self.identity)
|
||||
return None
|
||||
|
||||
try:
|
||||
# base64 decode -> gzip binary data
|
||||
#
|
||||
# NOTE(llm): The data sent from btoa() is technically a "non-standard"
|
||||
# Base64, but Python's standard decoder is usually robust enough to
|
||||
# handle it.
|
||||
compressed_data: bytes = base64.b64decode(base64_payload)
|
||||
except Exception as ex:
|
||||
LOG.warning(f"{self.class_name} failed to decode Base64 payload", exc_info=ex, **self.identity)
|
||||
return None
|
||||
|
||||
try:
|
||||
# gzip decompression -> bytes
|
||||
#
|
||||
# NOTE(llm): We use zlib.decompress with wbits=16 + zlib.MAX_WBITS (31).
|
||||
# This tells zlib to automatically detect and handle Gzip headers,
|
||||
# which is essential since the browser used CompressionStream('gzip').
|
||||
# Using zlib is often faster than the higher-level gzip module for this
|
||||
# purpose.
|
||||
decompressed_bytes: bytes = zlib.decompress(compressed_data, wbits=16 + zlib.MAX_WBITS)
|
||||
except zlib.error as e:
|
||||
LOG.warning(f"{self.class_name} decompression error: {e}", **self.identity)
|
||||
# Log the error, maybe log the first few characters of the payload for debugging
|
||||
return None
|
||||
|
||||
return decompressed_bytes
|
||||
|
||||
def serialize(self, decompressed_bytes: bytes | None) -> list[dict[str, t.Any]]:
|
||||
"""
|
||||
Convert decompressed bytes into a list of events (Python list/dictionary).
|
||||
"""
|
||||
if not decompressed_bytes:
|
||||
LOG.warning(f"{self.class_name} No decompressed bytes to serialize", **self.identity)
|
||||
return []
|
||||
|
||||
try:
|
||||
# bytes -> JSON string
|
||||
json_string: str = decompressed_bytes.decode("utf-8")
|
||||
except Exception as e:
|
||||
LOG.warning(f"{self.class_name} decode error: {e}", **self.identity)
|
||||
return []
|
||||
|
||||
try:
|
||||
# JSON string -> list of dicts
|
||||
events_list: list[dict[str, t.Any]] = json.loads(json_string)
|
||||
except Exception as e:
|
||||
LOG.warning(f"{self.class_name} JSON parsing error: {e}", **self.identity)
|
||||
return []
|
||||
|
||||
if not isinstance(events_list, list):
|
||||
LOG.warning(f"{self.class_name} Expected a list of events, got:", type(events_list), **self.identity)
|
||||
return []
|
||||
|
||||
return events_list
|
||||
|
||||
def reify(self, events_list: list[dict[str, t.Any]]) -> list[ExfiltratedEvent]:
|
||||
"""
|
||||
Convert a list of event dictionaries into a list of `ExfiltratedEvent`s.
|
||||
"""
|
||||
|
||||
if not events_list:
|
||||
LOG.warning(f"{self.class_name} No events to reify", **self.identity)
|
||||
return []
|
||||
|
||||
reified_events: list[ExfiltratedEvent] = []
|
||||
for event in events_list:
|
||||
if event.get("source") == "cdp":
|
||||
try:
|
||||
reified_event = ExfiltratedCdpEvent(**event)
|
||||
except Exception as e:
|
||||
LOG.warning(f"{self.class_name} Failed to reify CDP event: {e}", **self.identity)
|
||||
continue
|
||||
elif event.get("source") == "console":
|
||||
try:
|
||||
reified_event = ExfiltratedConsoleEvent(**event)
|
||||
except Exception as e:
|
||||
LOG.warning(f"{self.class_name} Failed to reify console event: {e}", **self.identity)
|
||||
continue
|
||||
else:
|
||||
LOG.error(f"{self.class_name} Unknown event source: {event.get('source')}", **self.identity)
|
||||
continue
|
||||
reified_events.append(reified_event)
|
||||
|
||||
return reified_events
|
||||
|
||||
def compressed_chunks_to_events(self, compressed_chunks: list[str]) -> list[ExfiltratedEvent]:
|
||||
"""
|
||||
Convert a list of base64 encoded and compressed (gzip) event strings into
|
||||
a list of `ExfiltratedEvent`s.
|
||||
"""
|
||||
all_events: list[ExfiltratedEvent] = []
|
||||
|
||||
for compressed_chunk in compressed_chunks:
|
||||
decompressed = self.decompress(compressed_chunk)
|
||||
serialized = self.serialize(decompressed)
|
||||
reified = self.reify(serialized)
|
||||
all_events.extend(reified)
|
||||
|
||||
return all_events
|
||||
|
||||
def events_to_actions(
|
||||
self,
|
||||
events: list[ExfiltratedEvent],
|
||||
machines: list[sm.StateMachine] | None = None,
|
||||
) -> list[Action]:
|
||||
"""
|
||||
Convert a list of `ExfiltratedEvent`s into `Action`s.
|
||||
"""
|
||||
actions: list[Action] = []
|
||||
|
||||
machines = machines or [
|
||||
sm.Click(),
|
||||
sm.Hover(),
|
||||
sm.InputText(),
|
||||
sm.UrlChange(),
|
||||
sm.Wait(),
|
||||
]
|
||||
|
||||
for event in events:
|
||||
for machine in machines:
|
||||
action = machine.tick(event, actions)
|
||||
|
||||
if not action:
|
||||
continue
|
||||
|
||||
allow_action = True
|
||||
|
||||
for m in machines:
|
||||
if not m.on_action(action, actions):
|
||||
allow_action = False
|
||||
LOG.debug(
|
||||
f"{self.class_name} action vetoed by state machine {m.__class__.__name__}",
|
||||
action=action,
|
||||
**self.identity,
|
||||
)
|
||||
|
||||
if allow_action:
|
||||
actions.append(action)
|
||||
else:
|
||||
# if an action was vetoed, we do not allow further processing
|
||||
# of this event through subsequent state machines
|
||||
break
|
||||
|
||||
return actions
|
||||
|
||||
def dedupe_block_labels(self, suspects: list[OutputBlock]) -> list[OutputBlock]:
|
||||
"""
|
||||
Detect if any block labels are duplicated, and, if so, rename them for
|
||||
uniqueness.
|
||||
"""
|
||||
|
||||
blocks: list[OutputBlock] = []
|
||||
labels: set[str] = set()
|
||||
|
||||
for block in suspects:
|
||||
if block.label not in labels:
|
||||
labels.add(block.label)
|
||||
blocks.append(block)
|
||||
continue
|
||||
else:
|
||||
original_label = block.label
|
||||
count = 0
|
||||
while True:
|
||||
new_label = f"{original_label}_{count}"
|
||||
if new_label not in labels:
|
||||
cls = block.__class__
|
||||
data = block.model_dump() | {"label": new_label}
|
||||
new_block = cls(**data)
|
||||
blocks.append(new_block)
|
||||
labels.add(new_label)
|
||||
break
|
||||
count += 1
|
||||
|
||||
return blocks
|
||||
|
||||
async def actions_to_blocks(self, actions: list[Action]) -> list[OutputBlock]:
|
||||
"""
|
||||
Convert a list of `Action` objects into workflow definition (YAML) blocks.
|
||||
"""
|
||||
tasks: list[asyncio.Task] = []
|
||||
|
||||
for action in actions:
|
||||
action_kind = action.kind.value
|
||||
|
||||
match action.kind:
|
||||
case ActionKind.CLICK | ActionKind.HOVER | ActionKind.INPUT_TEXT:
|
||||
task = asyncio.create_task(self.create_action_block(action))
|
||||
tasks.append(task)
|
||||
case ActionKind.URL_CHANGE:
|
||||
task = asyncio.create_task(self.create_url_block(action))
|
||||
tasks.append(task)
|
||||
case ActionKind.WAIT:
|
||||
task = asyncio.create_task(self.create_wait_block(action))
|
||||
tasks.append(task)
|
||||
case _:
|
||||
LOG.warning(
|
||||
f"{self.class_name} Unknown action kind: {action_kind}",
|
||||
action=action,
|
||||
**self.identity,
|
||||
)
|
||||
continue
|
||||
|
||||
blocks: list[OutputBlock] = await asyncio.gather(*tasks)
|
||||
|
||||
blocks = self.dedupe_block_labels(blocks)
|
||||
|
||||
return blocks
|
||||
|
||||
def blocks_to_parameters(self, blocks: list[OutputBlock]) -> list[WorkflowDefinitionYamlParametersItem_Workflow]:
|
||||
"""
|
||||
Convert a list of workflow definition (YAML) blocks into workflow definition (YAML) parameters.
|
||||
"""
|
||||
parameter_names: set[str] = set()
|
||||
|
||||
for block in blocks:
|
||||
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Action):
|
||||
for param_name in block.parameter_keys or []:
|
||||
parameter_names.add(param_name)
|
||||
|
||||
parameters: list[WorkflowDefinitionYamlParametersItem_Workflow] = []
|
||||
|
||||
for param_name in parameter_names:
|
||||
parameter = WorkflowDefinitionYamlParametersItem_Workflow(
|
||||
key=param_name,
|
||||
workflow_parameter_type="string",
|
||||
default_value="",
|
||||
description="",
|
||||
)
|
||||
parameters.append(parameter)
|
||||
|
||||
return parameters
|
||||
|
||||
async def create_action_block(self, action: ActionBlockable) -> WorkflowDefinitionYamlBlocksItem_Action:
|
||||
"""
|
||||
Create a YAML action block from an `ActionBlockable`.
|
||||
"""
|
||||
|
||||
DEFAULT_BLOCK_TITLE = "Browser Action"
|
||||
|
||||
if action.kind == ActionKind.INPUT_TEXT:
|
||||
prompt_name = "recording-action-block-prompt-input-text"
|
||||
else:
|
||||
prompt_name = "recording-action-block-prompt"
|
||||
|
||||
metadata_prompt = prompt_engine.load_prompt(
|
||||
prompt_name,
|
||||
action=action,
|
||||
)
|
||||
|
||||
metadata_response = await app.LLM_API_HANDLER(
|
||||
prompt=metadata_prompt,
|
||||
prompt_name=prompt_name,
|
||||
organization_id=self.organization_id,
|
||||
)
|
||||
|
||||
block_label: str = metadata_response.get("block_label", None) or "act"
|
||||
title: str = metadata_response.get("title", None) or DEFAULT_BLOCK_TITLE
|
||||
navigation_goal: str = metadata_response.get("prompt", "")
|
||||
parameter_name: dict | None = metadata_response.get("parameter_name", None)
|
||||
|
||||
block = WorkflowDefinitionYamlBlocksItem_Action(
|
||||
label=block_label,
|
||||
title=title,
|
||||
navigation_goal=navigation_goal,
|
||||
error_code_mapping=None,
|
||||
parameters=[parameter_name] if parameter_name else [], # sic(jdo): the frontend requires this
|
||||
parameter_keys=[parameter_name.get("key")] if parameter_name else [],
|
||||
)
|
||||
|
||||
return block
|
||||
|
||||
async def create_url_block(self, action: ActionUrlChange) -> WorkflowDefinitionYamlBlocksItem_GotoUrl:
|
||||
"""
|
||||
Create a YAML goto URL block from an `ActionUrlChange`.
|
||||
"""
|
||||
|
||||
prompt_name = "recording-go-to-url-block-prompt"
|
||||
|
||||
metadata_prompt = prompt_engine.load_prompt(
|
||||
prompt_name,
|
||||
action=action,
|
||||
)
|
||||
|
||||
metadata_response = await app.LLM_API_HANDLER(
|
||||
prompt=metadata_prompt,
|
||||
prompt_name=prompt_name,
|
||||
organization_id=self.organization_id,
|
||||
)
|
||||
|
||||
block_label: str = metadata_response.get("block_label", None) or "goto_url"
|
||||
|
||||
block = WorkflowDefinitionYamlBlocksItem_GotoUrl(
|
||||
label=block_label,
|
||||
url=action.url,
|
||||
)
|
||||
|
||||
return block
|
||||
|
||||
async def create_wait_block(self, action: ActionWait) -> WorkflowDefinitionYamlBlocksItem_Wait:
|
||||
"""
|
||||
Create a YAML wait block from an `ActionWait`.
|
||||
"""
|
||||
|
||||
prompt_name = "recording-wait-block-prompt"
|
||||
|
||||
metadata_prompt = prompt_engine.load_prompt(
|
||||
prompt_name,
|
||||
action=action,
|
||||
)
|
||||
|
||||
metadata_response = await app.LLM_API_HANDLER(
|
||||
prompt=metadata_prompt,
|
||||
prompt_name=prompt_name,
|
||||
organization_id=self.organization_id,
|
||||
)
|
||||
|
||||
block_label: str = metadata_response.get("block_label", None) or "wait"
|
||||
|
||||
block = WorkflowDefinitionYamlBlocksItem_Wait(
|
||||
label=block_label,
|
||||
wait_sec=int(max(action.duration_ms / 1000.0, ActionWait.MIN_DURATION_THRESHOLD_MS / 1000.0)),
|
||||
)
|
||||
|
||||
return block
|
||||
|
||||
async def process(
|
||||
self, compressed_chunks: list[str]
|
||||
) -> tuple[list[OutputBlock], list[WorkflowDefinitionYamlParametersItem_Workflow]]:
|
||||
"""
|
||||
Process the compressed browser session recording into workflow definition blocks.
|
||||
"""
|
||||
|
||||
events = self.compressed_chunks_to_events(compressed_chunks)
|
||||
actions = self.events_to_actions(events)
|
||||
blocks = await self.actions_to_blocks(actions)
|
||||
parameters = self.blocks_to_parameters(blocks)
|
||||
|
||||
return blocks, parameters
|
||||
|
||||
|
||||
class BrowserSessionRecordingService:
|
||||
async def process_recording(
|
||||
self,
|
||||
browser_session_id: str,
|
||||
organization_id: str,
|
||||
workflow_permanent_id: str,
|
||||
compressed_chunks: list[str],
|
||||
) -> tuple[list[OutputBlock], list[WorkflowDefinitionYamlParametersItem_Workflow]]:
|
||||
"""
|
||||
Process compressed browser session recording events into workflow definition blocks.
|
||||
"""
|
||||
processor = Processor(
|
||||
browser_session_id,
|
||||
organization_id,
|
||||
workflow_permanent_id,
|
||||
)
|
||||
|
||||
return await processor.process(compressed_chunks)
|
||||
|
||||
|
||||
async def smoke() -> None:
|
||||
with open(pathlib.Path("/path/to/uncompressed/events.json")) as f:
|
||||
raw_events: list[dict] = json.load(f)
|
||||
|
||||
events: list[ExfiltratedEvent] = []
|
||||
|
||||
for i, raw_event in enumerate(raw_events):
|
||||
if not isinstance(raw_event, dict):
|
||||
LOG.debug(f"~ skipping non-dict event: {raw_event}")
|
||||
continue
|
||||
if raw_event.get("source") == "cdp":
|
||||
try:
|
||||
event = ExfiltratedCdpEvent(**raw_event)
|
||||
except Exception:
|
||||
LOG.exception(f"{i} Failed to parse exfiltrated CDP event")
|
||||
LOG.debug(f"~ raw event: {json.dumps(raw_event, sort_keys=True, indent=2)}")
|
||||
continue
|
||||
events.append(event)
|
||||
elif raw_event.get("source") == "console":
|
||||
event = ExfiltratedConsoleEvent(**raw_event)
|
||||
events.append(event)
|
||||
|
||||
LOG.debug(f"{len(events)} events.")
|
||||
|
||||
my_local_org_id = "o_389844905020748346"
|
||||
processor = Processor("pbs_123", my_local_org_id, "wpid_123")
|
||||
actions = processor.events_to_actions(events)
|
||||
|
||||
LOG.debug(f"{len(actions)} actions:")
|
||||
|
||||
for action in actions:
|
||||
id = action.target.sky_id if action.target.sky_id else action.target.id
|
||||
text = ",".join(action.target.texts or [])
|
||||
LOG.debug(f" {action.kind} [{id}] [{text}] @ {action.url}")
|
||||
|
||||
blocks = await processor.actions_to_blocks(actions)
|
||||
|
||||
LOG.debug(f"{len(blocks)} blocks:")
|
||||
|
||||
for block in blocks:
|
||||
LOG.debug(f" {block.label}")
|
||||
|
||||
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Action):
|
||||
LOG.debug(f" title: {block.title}")
|
||||
LOG.debug(f" nav goal: {block.navigation_goal}")
|
||||
|
||||
if isinstance(block, WorkflowDefinitionYamlBlocksItem_GotoUrl):
|
||||
LOG.debug(f" url: {block.url}")
|
||||
|
||||
if isinstance(block, WorkflowDefinitionYamlBlocksItem_Wait):
|
||||
LOG.debug(f" wait sec: {block.wait_sec}")
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# from skyvern.forge.forge_app_initializer import start_forge_app
|
||||
|
||||
# start_forge_app()
|
||||
|
||||
# asyncio.run(smoke())
|
||||
@@ -0,0 +1,15 @@
|
||||
from .click import StateMachineClick as Click
|
||||
from .hover import StateMachineHover as Hover
|
||||
from .input_text import StateMachineInputText as InputText
|
||||
from .state_machine import StateMachine
|
||||
from .url_change import StateMachineUrlChange as UrlChange
|
||||
from .wait import StateMachineWait as Wait
|
||||
|
||||
__all__ = [
|
||||
"Click",
|
||||
"Hover",
|
||||
"InputText",
|
||||
"StateMachine",
|
||||
"UrlChange",
|
||||
"Wait",
|
||||
]
|
||||
89
skyvern/services/browser_recording/state_machines/click.py
Normal file
89
skyvern/services/browser_recording/state_machines/click.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionClick,
|
||||
ActionKind,
|
||||
ActionTarget,
|
||||
EventTarget,
|
||||
ExfiltratedEvent,
|
||||
Mouse,
|
||||
MousePosition,
|
||||
)
|
||||
|
||||
from .state_machine import StateMachine
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class StateMachineClick(StateMachine):
|
||||
state: t.Literal["void"] = "void"
|
||||
target: EventTarget | None = None
|
||||
timestamp: float | None = None
|
||||
mouse: MousePosition | None = None
|
||||
url: str | None = None
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.reset()
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionClick | None:
|
||||
if event.source != "console":
|
||||
return None
|
||||
|
||||
if event.params.type != "click":
|
||||
if event.params.mousePosition:
|
||||
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||
self.mouse = event.params.mousePosition
|
||||
return None
|
||||
|
||||
LOG.debug(f"~ click detected [{event.params.target.skyId or event.params.target.id}]")
|
||||
|
||||
self.target = event.params.target
|
||||
self.timestamp = event.params.timestamp
|
||||
self.url = event.params.url
|
||||
|
||||
if event.params.mousePosition:
|
||||
self.mouse = event.params.mousePosition
|
||||
|
||||
return self.emit(event)
|
||||
|
||||
def emit(self, event: ExfiltratedEvent) -> ActionClick | None:
|
||||
if not self.target:
|
||||
LOG.debug("~ cannot emit click, missing target; resetting")
|
||||
self.reset()
|
||||
return None
|
||||
|
||||
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||
|
||||
LOG.debug("~ emitting click action", exfiltrated_event=event)
|
||||
|
||||
action_target = ActionTarget(
|
||||
class_name=self.target.className,
|
||||
id=self.target.id,
|
||||
mouse=Mouse(xp=xp, yp=yp),
|
||||
sky_id=self.target.skyId,
|
||||
tag_name=self.target.tagName,
|
||||
texts=self.target.text,
|
||||
)
|
||||
|
||||
action = ActionClick(
|
||||
kind=ActionKind.CLICK.value,
|
||||
target=action_target,
|
||||
timestamp_start=self.timestamp,
|
||||
timestamp_end=self.timestamp,
|
||||
url=self.url,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
return action
|
||||
|
||||
def reset(self) -> None:
|
||||
self.state = "void"
|
||||
self.target = None
|
||||
self.timestamp = None
|
||||
self.mouse = None
|
||||
self.url = None
|
||||
154
skyvern/services/browser_recording/state_machines/hover.py
Normal file
154
skyvern/services/browser_recording/state_machines/hover.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionHover,
|
||||
ActionKind,
|
||||
ActionTarget,
|
||||
EventTarget,
|
||||
ExfiltratedEvent,
|
||||
Mouse,
|
||||
MousePosition,
|
||||
target_has_changed,
|
||||
)
|
||||
|
||||
from .state_machine import StateMachine
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class StateMachineHover(StateMachine):
|
||||
state: t.Literal["mouseenter", "next-event"] = "mouseenter"
|
||||
target: EventTarget | None = None
|
||||
timestamp_start: float | None = None
|
||||
timestamp_end: float | None = None
|
||||
mouse: MousePosition | None = None
|
||||
# --
|
||||
threshold_ms: int = ActionHover.DURATION_THRESHOLD_MS
|
||||
|
||||
def __init__(self, threshold_ms: int | None = None) -> None:
|
||||
self.threshold_ms = max(
|
||||
threshold_ms or ActionHover.DURATION_THRESHOLD_MS,
|
||||
ActionHover.MIN_DURATION_THRESHOLD_MS,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionHover | None:
|
||||
if event.source != "console":
|
||||
return None
|
||||
|
||||
match self.state:
|
||||
case "mouseenter":
|
||||
if event.params.mousePosition:
|
||||
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||
self.mouse = event.params.mousePosition
|
||||
|
||||
if event.params.type != "mouseenter":
|
||||
return None
|
||||
|
||||
text = ",".join(event.params.target.text or [])
|
||||
LOG.debug(
|
||||
f"~ hover: mouseenter detected [{event.params.target.skyId or event.params.target.id}] [{text}]"
|
||||
)
|
||||
|
||||
self.target = event.params.target
|
||||
self.state = "next-event"
|
||||
self.timestamp_start = event.params.timestamp
|
||||
|
||||
case "next-event":
|
||||
if event.params.type == "mouseenter":
|
||||
if target_has_changed(self.target, event.params.target):
|
||||
self.timestamp_start = event.params.timestamp
|
||||
self.target = event.params.target
|
||||
return None
|
||||
|
||||
event_end = event.params.timestamp
|
||||
|
||||
if not self.timestamp_start:
|
||||
LOG.debug("~ missing hover start timestamp, resetting")
|
||||
self.reset()
|
||||
|
||||
return None
|
||||
|
||||
duration_ms = int(event_end - self.timestamp_start)
|
||||
|
||||
if event.params.type == "mousemove":
|
||||
target = event.params.target
|
||||
|
||||
if not target_has_changed(self.target, target):
|
||||
return None
|
||||
|
||||
if duration_ms < self.threshold_ms:
|
||||
LOG.debug(f"~ hover duration {duration_ms}ms below threshold {self.threshold_ms}ms, resetting")
|
||||
self.reset()
|
||||
|
||||
return None
|
||||
|
||||
self.timestamp_end = event.params.timestamp
|
||||
LOG.debug(
|
||||
f"~ hover duration {duration_ms}ms meets threshold {self.threshold_ms}ms, emitting",
|
||||
start=self.timestamp_start,
|
||||
end=self.timestamp_end,
|
||||
)
|
||||
|
||||
# dedupe consecutive hover actions on same target
|
||||
if current_actions:
|
||||
last_action = current_actions[-1]
|
||||
|
||||
if last_action.kind == ActionKind.HOVER and self.target:
|
||||
target_id = self.target.skyId or self.target.id
|
||||
last_action_target_id = last_action.target.sky_id or last_action.target.id
|
||||
|
||||
if target_id:
|
||||
if target_id == last_action_target_id:
|
||||
LOG.debug("~ hover: duplicate hover action - skipping", target=self.target)
|
||||
self.reset()
|
||||
|
||||
return None
|
||||
|
||||
return self.emit(event)
|
||||
|
||||
return None
|
||||
|
||||
def emit(self, event: ExfiltratedEvent) -> ActionHover | None:
|
||||
if not self.target:
|
||||
LOG.debug("~ cannot emit hover, missing target; resetting")
|
||||
self.reset()
|
||||
|
||||
return None
|
||||
|
||||
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||
|
||||
LOG.debug("~ emitting hover action", exfiltrated_event=event)
|
||||
|
||||
action_target = ActionTarget(
|
||||
class_name=self.target.className,
|
||||
id=self.target.id,
|
||||
mouse=Mouse(xp=xp, yp=yp),
|
||||
sky_id=self.target.skyId,
|
||||
tag_name=self.target.tagName,
|
||||
texts=self.target.text,
|
||||
)
|
||||
|
||||
action = ActionHover(
|
||||
kind=ActionKind.HOVER.value,
|
||||
target=action_target,
|
||||
timestamp_start=self.timestamp_start or -1,
|
||||
timestamp_end=self.timestamp_end or -1,
|
||||
url=event.params.url,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
return action
|
||||
|
||||
def reset(self) -> None:
|
||||
self.state = "mouseenter"
|
||||
self.target = None
|
||||
self.timestamp_start = None
|
||||
self.timestamp_end = None
|
||||
self.mouse = None
|
||||
156
skyvern/services/browser_recording/state_machines/input_text.py
Normal file
156
skyvern/services/browser_recording/state_machines/input_text.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionInputText,
|
||||
ActionKind,
|
||||
ActionTarget,
|
||||
EventTarget,
|
||||
ExfiltratedEvent,
|
||||
Mouse,
|
||||
MousePosition,
|
||||
target_has_changed,
|
||||
)
|
||||
|
||||
from .state_machine import StateMachine
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class StateMachineInputText(StateMachine):
|
||||
state: t.Literal["focus", "keydown", "blur"] = "focus"
|
||||
target: EventTarget | None = None
|
||||
timestamp_start: float | None = None
|
||||
mouse: MousePosition | None = None
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.reset()
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionInputText | None:
|
||||
if event.source != "console":
|
||||
return None
|
||||
|
||||
match self.state:
|
||||
case "focus":
|
||||
if event.params.type != "focus":
|
||||
if event.params.mousePosition:
|
||||
if event.params.mousePosition.xp is not None and event.params.mousePosition.yp is not None:
|
||||
self.mouse = event.params.mousePosition
|
||||
|
||||
return None
|
||||
|
||||
LOG.debug(f"~ focus detected [{event.params.target.skyId or event.params.target.id}]")
|
||||
|
||||
if event.params.type == "focus":
|
||||
self.target = event.params.target
|
||||
self.state = "keydown"
|
||||
self.timestamp_start = event.params.timestamp
|
||||
|
||||
case "keydown":
|
||||
if event.params.type != "keydown":
|
||||
return None
|
||||
|
||||
if self.update_target(event.params.target):
|
||||
return None
|
||||
|
||||
LOG.debug(f"~ initial keydown detected '{event.params.key}'")
|
||||
|
||||
self.state = "blur"
|
||||
case "blur":
|
||||
if event.params.type == "keydown":
|
||||
LOG.debug(f"~ input-text: subsequent keydown detected '{event.params.key}'")
|
||||
|
||||
if event.params.key == "Enter":
|
||||
return self.emit(event)
|
||||
|
||||
return None
|
||||
|
||||
if event.params.type != "blur":
|
||||
return None
|
||||
|
||||
if self.update_target(event.params.target):
|
||||
return None
|
||||
|
||||
LOG.debug("~ blur detected")
|
||||
|
||||
return self.emit(event)
|
||||
|
||||
return None
|
||||
|
||||
def update_target(self, event_target: EventTarget) -> bool:
|
||||
if target_has_changed(self.target, event_target):
|
||||
self.target = event_target
|
||||
|
||||
LOG.debug("~ input-text target changed, resetting state machine")
|
||||
|
||||
self.reset()
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def emit(self, event: ExfiltratedEvent) -> ActionInputText | None:
|
||||
if not self.target:
|
||||
LOG.debug("~ cannot emit, missing target or mouse; resetting")
|
||||
|
||||
self.reset()
|
||||
return None
|
||||
|
||||
xp = (self.mouse.xp or -1) if self.mouse else None
|
||||
yp = (self.mouse.yp or -1) if self.mouse else None
|
||||
|
||||
LOG.debug("~ emitting input text action", exfiltrated_event=event)
|
||||
|
||||
input_value = event.params.target.value
|
||||
|
||||
if input_value is None:
|
||||
LOG.debug("~ cannot emit, missing input value; resetting")
|
||||
|
||||
self.reset()
|
||||
return None
|
||||
|
||||
action_target = ActionTarget(
|
||||
class_name=self.target.className,
|
||||
id=self.target.id,
|
||||
mouse=Mouse(xp=xp, yp=yp),
|
||||
sky_id=self.target.skyId,
|
||||
tag_name=self.target.tagName,
|
||||
texts=self.target.text,
|
||||
)
|
||||
|
||||
action = ActionInputText(
|
||||
kind=ActionKind.INPUT_TEXT.value,
|
||||
target=action_target,
|
||||
timestamp_start=self.timestamp_start,
|
||||
timestamp_end=event.params.timestamp,
|
||||
url=event.params.url,
|
||||
input_value=str(input_value),
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
return action
|
||||
|
||||
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||
if action.kind == ActionKind.CLICK:
|
||||
# NOTE(jdo): skipping self.reset here; a focus event on an element can often be followed by a
|
||||
# click event, and the identity doesn't always match due to nesting. I think a more precise
|
||||
# check would be to:
|
||||
# - check identity match for click and focus; if not matching:
|
||||
# - ask the browser via cdp if there is a nesting relation between the two elements
|
||||
# - if yes, allow and carry on, otherwise reset
|
||||
# That's another round trip. It's likely pretty fast, tho. For now, we'll just assume a click does
|
||||
# not invalidate the state of the input text state machine.
|
||||
return True
|
||||
|
||||
self.reset()
|
||||
|
||||
return True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.state = "focus"
|
||||
self.target = None
|
||||
self.mouse = None
|
||||
self.timestamp_start = None
|
||||
@@ -0,0 +1,30 @@
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ExfiltratedEvent,
|
||||
)
|
||||
|
||||
|
||||
class StateMachine:
|
||||
"""
|
||||
A minimal, concrete StateMachineProtocol.
|
||||
"""
|
||||
|
||||
state: str
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> Action | None:
|
||||
return None
|
||||
|
||||
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||
"""
|
||||
Optional callback when an action is emitted by _any_ state machine.
|
||||
Default is that a state machine resets.
|
||||
|
||||
Return `True` (the default) to allow the action to proceed; return `False`
|
||||
to veto the action.
|
||||
"""
|
||||
self.reset()
|
||||
|
||||
return True
|
||||
|
||||
def reset(self) -> None:
|
||||
pass
|
||||
@@ -0,0 +1,91 @@
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionKind,
|
||||
ActionTarget,
|
||||
ActionUrlChange,
|
||||
ExfiltratedEvent,
|
||||
Mouse,
|
||||
)
|
||||
|
||||
from .state_machine import StateMachine
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class StateMachineUrlChange(StateMachine):
|
||||
state: t.Literal["void"] = "void"
|
||||
last_url: str | None = None
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.reset()
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionUrlChange | None:
|
||||
if event.source != "cdp":
|
||||
return None
|
||||
|
||||
if not event.event_name.startswith("nav:"):
|
||||
return None
|
||||
|
||||
if event.event_name == "nav:frame_started_navigating":
|
||||
url = event.params.url
|
||||
|
||||
if url == self.last_url:
|
||||
LOG.debug("~ ignoring navigation to same URL", url=url)
|
||||
return None
|
||||
else:
|
||||
if event.params.frame:
|
||||
self.last_url = event.params.frame.url
|
||||
elif event.params.url:
|
||||
self.last_url = event.params.url
|
||||
|
||||
return None
|
||||
|
||||
if not url:
|
||||
return None
|
||||
|
||||
self.last_url = url
|
||||
|
||||
LOG.debug("~ emitting URL change action", url=url)
|
||||
|
||||
action_target = ActionTarget(
|
||||
class_name=None,
|
||||
id=None,
|
||||
mouse=Mouse(xp=None, yp=None),
|
||||
sky_id=None,
|
||||
tag_name=None,
|
||||
texts=[],
|
||||
)
|
||||
|
||||
return ActionUrlChange(
|
||||
kind=ActionKind.URL_CHANGE.value,
|
||||
target=action_target,
|
||||
timestamp_start=event.timestamp,
|
||||
timestamp_end=event.timestamp,
|
||||
url=url,
|
||||
)
|
||||
|
||||
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||
if action.kind != ActionKind.URL_CHANGE:
|
||||
return True
|
||||
|
||||
if not current_actions:
|
||||
return True
|
||||
|
||||
last_action = current_actions[-1]
|
||||
|
||||
if last_action.kind != ActionKind.URL_CHANGE:
|
||||
return True
|
||||
|
||||
if last_action.url == action.url:
|
||||
LOG.debug("~ vetoing duplicate URL change action", url=action.url)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.state = "void"
|
||||
self.last_url = None
|
||||
78
skyvern/services/browser_recording/state_machines/wait.py
Normal file
78
skyvern/services/browser_recording/state_machines/wait.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import typing as t
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.services.browser_recording.types import (
|
||||
Action,
|
||||
ActionKind,
|
||||
ActionTarget,
|
||||
ActionWait,
|
||||
ExfiltratedEvent,
|
||||
Mouse,
|
||||
)
|
||||
|
||||
from .state_machine import StateMachine
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class StateMachineWait(StateMachine):
|
||||
state: t.Literal["void"] = "void"
|
||||
last_event_timestamp: float | None = None
|
||||
threshold_ms: int = ActionWait.MIN_DURATION_THRESHOLD_MS
|
||||
|
||||
def __init__(self, threshold_ms: int | None = None) -> None:
|
||||
self.threshold_ms = max(
|
||||
threshold_ms or ActionWait.MIN_DURATION_THRESHOLD_MS,
|
||||
ActionWait.MIN_DURATION_THRESHOLD_MS,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> ActionWait | None:
|
||||
if event.source != "console":
|
||||
return None
|
||||
|
||||
if self.last_event_timestamp is not None:
|
||||
duration_ms = int(event.params.timestamp - self.last_event_timestamp)
|
||||
|
||||
if duration_ms >= self.threshold_ms:
|
||||
LOG.debug("~ emitting wait action", duration_ms=duration_ms)
|
||||
|
||||
action_target = ActionTarget(
|
||||
class_name=None,
|
||||
id=None,
|
||||
mouse=Mouse(xp=None, yp=None),
|
||||
sky_id=None,
|
||||
tag_name=None,
|
||||
texts=[],
|
||||
)
|
||||
|
||||
action = ActionWait(
|
||||
kind=ActionKind.WAIT.value,
|
||||
target=action_target,
|
||||
timestamp_start=self.last_event_timestamp,
|
||||
timestamp_end=event.params.timestamp,
|
||||
url=event.params.url,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
return action
|
||||
|
||||
self.last_event_timestamp = event.params.timestamp
|
||||
|
||||
return None
|
||||
|
||||
def on_action(self, action: Action, current_actions: list[Action]) -> bool:
|
||||
if action.kind == ActionKind.HOVER:
|
||||
return True
|
||||
|
||||
self.reset()
|
||||
|
||||
return True
|
||||
|
||||
def reset(self) -> None:
|
||||
self.state = "void"
|
||||
self.last_event_timestamp = None
|
||||
243
skyvern/services/browser_recording/types.py
Normal file
243
skyvern/services/browser_recording/types.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
A types module for browser recording actions and events.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import typing as t
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from skyvern.client.types.workflow_definition_yaml_blocks_item import (
|
||||
WorkflowDefinitionYamlBlocksItem_Action,
|
||||
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||
)
|
||||
|
||||
|
||||
class ActionKind(enum.StrEnum):
|
||||
CLICK = "click"
|
||||
HOVER = "hover"
|
||||
INPUT_TEXT = "input_text"
|
||||
URL_CHANGE = "url_change"
|
||||
WAIT = "wait"
|
||||
|
||||
|
||||
class ActionBase(BaseModel):
|
||||
kind: ActionKind
|
||||
# --
|
||||
target: "ActionTarget"
|
||||
timestamp_start: float
|
||||
timestamp_end: float
|
||||
url: str
|
||||
|
||||
|
||||
class ActionClick(ActionBase):
|
||||
kind: t.Literal[ActionKind.CLICK]
|
||||
|
||||
|
||||
class ActionHover(ActionBase):
|
||||
kind: t.Literal[ActionKind.HOVER]
|
||||
# --
|
||||
DURATION_THRESHOLD_MS: t.ClassVar[int] = 2000
|
||||
MIN_DURATION_THRESHOLD_MS: t.ClassVar[int] = 1000
|
||||
|
||||
|
||||
class ActionInputText(ActionBase):
|
||||
kind: t.Literal[ActionKind.INPUT_TEXT]
|
||||
# --
|
||||
input_value: str
|
||||
|
||||
|
||||
class ActionUrlChange(ActionBase):
|
||||
kind: t.Literal[ActionKind.URL_CHANGE]
|
||||
|
||||
|
||||
class ActionWait(ActionBase):
|
||||
kind: t.Literal[ActionKind.WAIT]
|
||||
# --
|
||||
duration_ms: int
|
||||
MIN_DURATION_THRESHOLD_MS: t.ClassVar[int] = 5000
|
||||
|
||||
|
||||
Action = ActionClick | ActionHover | ActionInputText | ActionUrlChange | ActionWait
|
||||
|
||||
ActionBlockable = ActionClick | ActionHover | ActionInputText
|
||||
|
||||
|
||||
class ActionTarget(BaseModel):
|
||||
class_name: str | None = None
|
||||
id: str | None = None
|
||||
mouse: "Mouse"
|
||||
sky_id: str | None = None
|
||||
tag_name: str | None = None
|
||||
texts: list[str] = []
|
||||
|
||||
|
||||
class Mouse(BaseModel):
|
||||
xp: float | None = None
|
||||
"""
|
||||
0 to 1.0 inclusive, percentage across the viewport
|
||||
"""
|
||||
yp: float | None = None
|
||||
"""
|
||||
0 to 1.0 inclusive, percentage down the viewport
|
||||
"""
|
||||
|
||||
|
||||
OutputBlock = t.Union[
|
||||
WorkflowDefinitionYamlBlocksItem_Action,
|
||||
WorkflowDefinitionYamlBlocksItem_GotoUrl,
|
||||
WorkflowDefinitionYamlBlocksItem_Wait,
|
||||
]
|
||||
|
||||
|
||||
class TargetInfo(BaseModel):
|
||||
attached: bool | None = None
|
||||
browserContextId: str | None = None
|
||||
canAccessOpener: bool | None = None
|
||||
targetId: str | None = None
|
||||
title: str | None = None
|
||||
type: str | None = None
|
||||
url: str | None = None
|
||||
|
||||
|
||||
class CdpEventFrame(BaseModel):
|
||||
url: str | None = None
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
|
||||
class ExfiltratedEventCdpParams(BaseModel):
|
||||
# target_info_changed events
|
||||
targetInfo: TargetInfo | None = None
|
||||
|
||||
# frame_requested_navigation events
|
||||
disposition: str | None = None
|
||||
frameId: str | None = None
|
||||
reason: str | None = None
|
||||
url: str | None = None
|
||||
|
||||
# frame_navigated events
|
||||
frame: CdpEventFrame | None = None
|
||||
|
||||
|
||||
class EventTarget(BaseModel):
|
||||
className: str | None = None
|
||||
id: str | None = None
|
||||
isHtml: bool = False
|
||||
isSvg: bool = False
|
||||
innerText: str | None = None
|
||||
skyId: str | None = None
|
||||
tagName: str | None = None
|
||||
text: list[str] = []
|
||||
value: str | int | None = None
|
||||
|
||||
|
||||
class MousePosition(BaseModel):
|
||||
xa: float | None = None
|
||||
ya: float | None = None
|
||||
xp: float | None = None
|
||||
yp: float | None = None
|
||||
|
||||
|
||||
class BoundingRect(BaseModel):
|
||||
bottom: float
|
||||
height: float
|
||||
left: float
|
||||
right: float
|
||||
top: float
|
||||
width: float
|
||||
x: float
|
||||
y: float
|
||||
|
||||
|
||||
class Scroll(BaseModel):
|
||||
clientHeight: float
|
||||
clientWidth: float
|
||||
scrollHeight: float
|
||||
scrollLeft: float
|
||||
scrollTop: float
|
||||
scrollWidth: float
|
||||
|
||||
|
||||
class ActiveElement(BaseModel):
|
||||
boundingRect: BoundingRect | None = None
|
||||
className: str | None = None
|
||||
id: str | None = None
|
||||
scroll: Scroll | None = None
|
||||
tagName: str | None = None
|
||||
|
||||
|
||||
class Window(BaseModel):
|
||||
height: float
|
||||
scrollX: float
|
||||
scrollY: float
|
||||
width: float
|
||||
|
||||
|
||||
class ExfiltratedEventConsoleParams(BaseModel):
|
||||
activeElement: ActiveElement
|
||||
code: str | None = None
|
||||
inputValue: str | None = None
|
||||
key: str | None = None
|
||||
mousePosition: MousePosition
|
||||
target: EventTarget
|
||||
timestamp: float
|
||||
type: str
|
||||
url: str
|
||||
window: Window
|
||||
|
||||
|
||||
class ExfiltratedCdpEvent(BaseModel):
|
||||
kind: Literal["exfiltrated-event"]
|
||||
event_name: str
|
||||
params: ExfiltratedEventCdpParams
|
||||
source: Literal["cdp"]
|
||||
timestamp: float
|
||||
|
||||
|
||||
class ExfiltratedConsoleEvent(BaseModel):
|
||||
kind: Literal["exfiltrated-event"]
|
||||
event_name: str
|
||||
params: ExfiltratedEventConsoleParams
|
||||
source: Literal["console"]
|
||||
timestamp: float
|
||||
|
||||
|
||||
ExfiltratedEvent = ExfiltratedCdpEvent | ExfiltratedConsoleEvent
|
||||
|
||||
|
||||
class StateMachineProtocol(t.Protocol):
|
||||
state: str
|
||||
|
||||
def tick(self, event: ExfiltratedEvent, current_actions: list[Action]) -> Action | None: ...
|
||||
|
||||
def on_action(self, action: Action, current_actions: list[Action]) -> bool: ...
|
||||
|
||||
def reset(self) -> None: ...
|
||||
|
||||
|
||||
# -- guards, predicates, etc.
|
||||
|
||||
|
||||
def target_has_changed(current_target: EventTarget | None, event_target: EventTarget) -> bool:
|
||||
if not current_target:
|
||||
return False
|
||||
|
||||
if not event_target.skyId and not event_target.id:
|
||||
return True # sic: we cannot compare, so assume changed
|
||||
|
||||
if not current_target.skyId and not current_target.id:
|
||||
return True # sic: we cannot compare, so assume changed
|
||||
|
||||
if current_target.id and event_target.id:
|
||||
if current_target.id != event_target.id:
|
||||
return True
|
||||
|
||||
if current_target.skyId and event_target.skyId:
|
||||
if current_target.skyId != event_target.skyId:
|
||||
return True
|
||||
|
||||
return False
|
||||
Reference in New Issue
Block a user