support aws s3 storage (#720)

This commit is contained in:
Shuchang Zheng
2024-08-29 15:01:13 -07:00
committed by GitHub
parent 11811e74bf
commit ffc4b35032
4 changed files with 60 additions and 0 deletions

View File

@@ -44,3 +44,13 @@ The following endpoint can be used to retrieve artifacts for a specific step:
``` ```
GET https://api.skyvern.com/api/v1/tasks/{task_id}/steps/{step_id}/artifacts GET https://api.skyvern.com/api/v1/tasks/{task_id}/steps/{step_id}/artifacts
``` ```
## Artifacts configurations
By default, Skyvern stores artifacts, including video recording, screenshots, llm requests and responses, html and skyvern parsed html elements locally in the `/artifacts` folder under the skyvern repository.
You can also have skyvern to upload atrifacts to your s3 buckets. To do this, first set up these environment variables:
- `AWS_DEFAULT_REGION`: `us-east-1`, `us-west-1`, ...
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
- `SKYVERN_STORAGE_TYPE`: set it to be `s3`. The default is `local`
Make sure these s3 buckets are created: `skyvern-artifacts`, `skyvern-screenshots`. These are the default bucket names skyvern uses. To customize the bucket names, change these two env variables: `AWS_S3_BUCKET_ARTIFACTS` and `AWS_S3_BUCKET_SCREENSHOTS`

View File

@@ -45,6 +45,11 @@ class Settings(BaseSettings):
# Artifact storage settings # Artifact storage settings
ARTIFACT_STORAGE_PATH: str = f"{SKYVERN_DIR}/artifacts" ARTIFACT_STORAGE_PATH: str = f"{SKYVERN_DIR}/artifacts"
GENERATE_PRESIGNED_URLS: bool = False GENERATE_PRESIGNED_URLS: bool = False
AWS_S3_BUCKET_ARTIFACTS: str = "skyvern-artifacts"
AWS_S3_BUCKET_SCREENSHOTS: str = "skyvern-screenshots"
# Supported storage types: local, s3
SKYVERN_STORAGE_TYPE: str = "local"
# S3 bucket settings # S3 bucket settings
AWS_REGION: str = "us-east-1" AWS_REGION: str = "us-east-1"

View File

@@ -8,6 +8,7 @@ from skyvern.forge.agent_functions import AgentFunction
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
from skyvern.forge.sdk.artifact.manager import ArtifactManager from skyvern.forge.sdk.artifact.manager import ArtifactManager
from skyvern.forge.sdk.artifact.storage.factory import StorageFactory from skyvern.forge.sdk.artifact.storage.factory import StorageFactory
from skyvern.forge.sdk.artifact.storage.s3 import S3Storage
from skyvern.forge.sdk.cache.factory import CacheFactory from skyvern.forge.sdk.cache.factory import CacheFactory
from skyvern.forge.sdk.db.client import AgentDB from skyvern.forge.sdk.db.client import AgentDB
from skyvern.forge.sdk.experimentation.providers import BaseExperimentationProvider, NoOpExperimentationProvider from skyvern.forge.sdk.experimentation.providers import BaseExperimentationProvider, NoOpExperimentationProvider
@@ -22,6 +23,8 @@ DATABASE = AgentDB(
SettingsManager.get_settings().DATABASE_STRING, SettingsManager.get_settings().DATABASE_STRING,
debug_enabled=SettingsManager.get_settings().DEBUG_MODE, debug_enabled=SettingsManager.get_settings().DEBUG_MODE,
) )
if SettingsManager.get_settings().SKYVERN_STORAGE_TYPE == "s3":
StorageFactory.set_storage(S3Storage())
STORAGE = StorageFactory.get_storage() STORAGE = StorageFactory.get_storage()
CACHE = CacheFactory.get_cache() CACHE = CacheFactory.get_cache()
ARTIFACT_MANAGER = ArtifactManager() ARTIFACT_MANAGER = ArtifactManager()

View File

@@ -0,0 +1,42 @@
from datetime import datetime
from skyvern.config import settings
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
class S3Storage(BaseStorage):
def __init__(self, bucket: str | None = None) -> None:
self.async_client = AsyncAWSClient()
self.bucket = bucket or settings.AWS_S3_BUCKET_ARTIFACTS
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
await self.async_client.upload_file(artifact.uri, data)
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
return await self.async_client.download_file(artifact.uri)
async def get_share_link(self, artifact: Artifact) -> str | None:
share_urls = await self.async_client.create_presigned_urls([artifact.uri])
return share_urls[0] if share_urls else None
async def get_share_links(self, artifacts: list[Artifact]) -> list[str] | None:
return await self.async_client.create_presigned_urls([artifact.uri for artifact in artifacts])
async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
await self.async_client.upload_file_from_path(artifact.uri, path)
async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}"
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
await self.async_client.upload_file_from_path(to_path, from_path)
async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
return await self.async_client.download_file(path, log_exception=False)