diff --git a/maxun-core/jest.config.js b/maxun-core/jest.config.js new file mode 100644 index 00000000..e4e978ec --- /dev/null +++ b/maxun-core/jest.config.js @@ -0,0 +1,10 @@ +/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', + globals: { + 'ts-jest': { + isolatedModules: true + } + } +}; \ No newline at end of file diff --git a/maxun-core/package.json b/maxun-core/package.json new file mode 100644 index 00000000..2fa29f22 --- /dev/null +++ b/maxun-core/package.json @@ -0,0 +1,29 @@ +{ + "name": "maxun-core", + "version": "0.0.1", + "description": "Smart Workflow interpreter", + "main": "build/index.js", + "typings": "build/index.d.ts", + "scripts": { + "test": "jest", + "build": "npm run clean && tsc", + "lint": "eslint .", + "clean": "rimraf ./build" + }, + "files": [ + "build/*" + ], + "keywords": [ + "web", + "automation", + "workflow", + "interpret", + "scraping" + ], + "author": "Karishma Shukla", + "license": "MIT", + "dependencies": { + "joi": "^17.6.0", + "playwright": "^1.20.1" + } +} diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js new file mode 100644 index 00000000..262e63ec --- /dev/null +++ b/maxun-core/src/browserSide/scraper.js @@ -0,0 +1,253 @@ +/* eslint-disable @typescript-eslint/no-unused-vars */ + +const area = (element) => element.offsetHeight * element.offsetWidth; + +function getBiggestElement(selector) { + const elements = Array.from(document.querySelectorAll(selector)); + const biggest = elements.reduce( + (max, elem) => ( + area(elem) > area(max) ? elem : max), + { offsetHeight: 0, offsetWidth: 0 }, + ); + return biggest; +} + +/** + * Generates structural selector (describing element by its DOM tree location). + * + * **The generated selector is not guaranteed to be unique!** (In fact, this is + * the desired behaviour in here.) + * @param {HTMLElement} element Element being described. + * @returns {string} CSS-compliant selector describing the element's location in the DOM tree. + */ +function GetSelectorStructural(element) { + // Base conditions for the recursive approach. + if (element.tagName === 'BODY') { + return 'BODY'; + } + const selector = element.tagName; + if (element.parentElement) { + return `${GetSelectorStructural(element.parentElement)} > ${selector}`; + } + + return selector; +} + +/** + * Heuristic method to find collections of "interesting" items on the page. + * @returns {Array} A collection of interesting DOM nodes + * (online store products, plane tickets, list items... and many more?) + */ +function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') { + const restoreScroll = (() => { + const { scrollX, scrollY } = window; + return () => { + window.scrollTo(scrollX, scrollY); + }; + })(); + + /** +* @typedef {Array<{x: number, y: number}>} Grid +*/ + + /** + * Returns an array of grid-aligned {x,y} points. + * @param {number} [granularity=0.005] sets the number of generated points + * (the higher the granularity, the more points). + * @returns {Grid} Array of {x, y} objects. + */ + function getGrid(startX = 0, startY = 0, granularity = 0.005) { + const width = window.innerWidth; + const height = window.innerHeight; + + const out = []; + for (let x = 0; x < width; x += 1 / granularity) { + for (let y = 0; y < height; y += 1 / granularity) { + out.push({ x: startX + x, y: startY + y }); + } + } + return out; + } + + let maxSelector = { selector: 'body', metric: 0 }; + + const updateMaximumWithPoint = (point) => { + const currentElement = document.elementFromPoint(point.x, point.y); + const selector = GetSelectorStructural(currentElement); + + const elements = Array.from(document.querySelectorAll(selector)) + .filter((element) => area(element) > minArea); + + // If the current selector targets less than three elements, + // we consider it not interesting (would be a very underwhelming scraper) + if (elements.length < 3) { + return; + } + + let metric = null; + + if (metricType === 'total_area') { + metric = elements + .reduce((p, x) => p + area(x), 0); + } else if (metricType === 'size_deviation') { + // This could use a proper "statistics" approach... but meh, so far so good! + const sizes = elements + .map((element) => area(element)); + + metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes)); + } + + // console.debug(`Total ${metricType} is ${metric}.`) + if (metric > maxSelector.metric && elements.length < maxCountPerPage) { + maxSelector = { selector, metric }; + } + }; + + for (let scroll = 0; scroll < scrolls; scroll += 1) { + window.scrollTo(0, scroll * window.innerHeight); + + const grid = getGrid(); + + grid.forEach(updateMaximumWithPoint); + } + + restoreScroll(); + + let out = Array.from(document.querySelectorAll(maxSelector.selector)); + + const different = (x, i, a) => a.findIndex((e) => e === x) === i; + // as long as we don't merge any two elements by substituing them for their parents, + // we substitute. + while (out.map((x) => x.parentElement).every(different) + && out.forEach((x) => x.parentElement !== null)) { + out = out.map((x) => x.parentElement ?? x); + } + + return out; +} + +/** + * Returns a "scrape" result from the current page. + * @returns {Array} *Curated* array of scraped information (with sparse rows removed) + */ +// Wrap the entire function in an IIFE (Immediately Invoked Function Expression) +// and attach it to the window object +(function (window) { + /** + * Returns a "scrape" result from the current page. + * @returns {Array} *Curated* array of scraped information (with sparse rows removed) + */ + window.scrape = function (selector = null) { + /** + * **crudeRecords** contains uncurated rundowns of "scrapable" elements + * @type {Array} + */ + const crudeRecords = (selector + ? Array.from(document.querySelectorAll(selector)) + : scrapableHeuristics()) + .map((record) => ({ + ...Array.from(record.querySelectorAll('img')) + .reduce((p, x, i) => { + let url = null; + if (x.srcset) { + const urls = x.srcset.split(', '); + [url] = urls[urls.length - 1].split(' '); + } + + /** + * Contains the largest elements from `srcset` - if `srcset` is not present, contains + * URL from the `src` attribute + * + * If the `src` attribute contains a data url, imgUrl contains `undefined`. + */ + let imgUrl; + if (x.srcset) { + imgUrl = url; + } else if (x.src.indexOf('data:') === -1) { + imgUrl = x.src; + } + + return ({ + ...p, + ...(imgUrl ? { [`img_${i}`]: imgUrl } : {}), + }); + }, {}), + ...record.innerText.split('\n') + .reduce((p, x, i) => ({ + ...p, + [`record_${String(i).padStart(4, '0')}`]: x.trim(), + }), {}), + })); + + return crudeRecords; + }; + + /** + * Given an object with named lists of elements, + * groups the elements by their distance in the DOM tree. + * @param {Object.} lists The named lists of HTML elements. + * @returns {Array.>} + */ + window.scrapeSchema = function (lists) { + function omap(object, f, kf = (x) => x) { + return Object.fromEntries( + Object.entries(object) + .map(([k, v]) => [kf(k), f(v)]), + ); + } + + function ofilter(object, f) { + return Object.fromEntries( + Object.entries(object) + .filter(([k, v]) => f(k, v)), + ); + } + + function getSeedKey(listObj) { + const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length))); + return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0]; + } + + function getMBEs(elements) { + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } + + return candidate; + }); + } + + const seedName = getSeedKey(lists); + const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector)); + const MBEs = getMBEs(seedElements); + + return MBEs.map((mbe) => omap( + lists, + ({ selector, attribute }, key) => { + const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); + if (!elem) return undefined; + + switch (attribute) { + case 'href': + return elem.getAttribute('href'); + case 'src': + return elem.getAttribute('src'); + case 'innerText': + return elem.innerText; + case 'textContent': + return elem.textContent; + default: + return elem.innerText; + } + }, + (key) => key // Use the original key in the output + )); + } + +})(window); \ No newline at end of file diff --git a/maxun-core/src/index.ts b/maxun-core/src/index.ts new file mode 100644 index 00000000..571f5781 --- /dev/null +++ b/maxun-core/src/index.ts @@ -0,0 +1,8 @@ +import Interpreter from './interpret'; + +export default Interpreter; +export { default as Preprocessor } from './preprocessor'; +export type { + WorkflowFile, WhereWhatPair, Where, What, +} from './types/workflow'; +export { unaryOperators, naryOperators, meta as metaOperators } from './types/logic'; \ No newline at end of file diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts new file mode 100644 index 00000000..4860d2fd --- /dev/null +++ b/maxun-core/src/interpret.ts @@ -0,0 +1,457 @@ +/* eslint-disable no-await-in-loop, no-restricted-syntax */ +import { Page, PageScreenshotOptions } from 'playwright'; +import path from 'path'; + +import { EventEmitter } from 'events'; +import { + Where, What, PageState, Workflow, WorkflowFile, + ParamType, SelectorArray, CustomFunctions, +} from './types/workflow'; + +import { operators, meta } from './types/logic'; +import { arrayToObject } from './utils/utils'; +import Concurrency from './utils/concurrency'; +import Preprocessor from './preprocessor'; +import log, { Level } from './utils/logger'; + +/** + * Defines optional intepreter options (passed in constructor) + */ +interface InterpreterOptions { + maxRepeats: number; + maxConcurrency: number; + serializableCallback: (output: any) => (void | Promise); + binaryCallback: (output: any, mimeType: string) => (void | Promise); + debug: boolean; + debugChannel: Partial<{ + activeId: Function, + debugMessage: Function, + }> +} + +/** + * Class for running the Smart Workflows. + */ +export default class Interpreter extends EventEmitter { + private workflow: Workflow; + + private initializedWorkflow: Workflow | null; + + private options: InterpreterOptions; + + private concurrency: Concurrency; + + private stopper: Function | null = null; + + private log: typeof log; + + constructor(workflow: WorkflowFile, options?: Partial) { + super(); + this.workflow = workflow.workflow; + this.initializedWorkflow = null; + this.options = { + maxRepeats: 5, + maxConcurrency: 5, + serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); }, + binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); }, + debug: false, + debugChannel: {}, + ...options, + }; + this.concurrency = new Concurrency(this.options.maxConcurrency); + this.log = (...args) => log(...args); + + const error = Preprocessor.validateWorkflow(workflow); + if (error) { + throw (error); + } + + if (this.options.debugChannel?.debugMessage) { + const oldLog = this.log; + // @ts-ignore + this.log = (...args: Parameters) => { + if (args[1] !== Level.LOG) { + this.options.debugChannel.debugMessage!(typeof args[0] === 'string' ? args[0] : args[0].message); + } + oldLog(...args); + }; + } + } + + /** + * Returns the context object from given Page and the current workflow.\ + * \ + * `workflow` is used for selector extraction - function searches for used selectors to + * look for later in the page's context. + * @param page Playwright Page object + * @param workflow Current **initialized** workflow (array of where-what pairs). + * @returns {PageState} State of the current page. + */ + private async getState(page: Page, workflow: Workflow): Promise { + /** + * All the selectors present in the current Workflow + */ + const selectors = Preprocessor.extractSelectors(workflow); + + /** + * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). + * @param selector Selector to be queried + * @returns True if the targetted element is actionable, false otherwise. + */ + const actionable = async (selector: string): Promise => { + try { + const proms = [ + page.isEnabled(selector, { timeout: 500 }), + page.isVisible(selector, { timeout: 500 }), + ]; + + return await Promise.all(proms).then((bools) => bools.every((x) => x)); + } catch (e) { + // log(e, Level.ERROR); + return false; + } + }; + + /** + * Object of selectors present in the current page. + */ + const presentSelectors: SelectorArray = await Promise.all( + selectors.map(async (selector) => { + if (await actionable(selector)) { + return [selector]; + } + return []; + }), + ).then((x) => x.flat()); + + return { + url: page.url(), + cookies: (await page.context().cookies([page.url()])) + .reduce((p, cookie) => ( + { + ...p, + [cookie.name]: cookie.value, + }), {}), + selectors: presentSelectors, + }; + } + + /** + * Tests if the given action is applicable with the given context. + * @param where Tested *where* condition + * @param context Current browser context. + * @returns True if `where` is applicable in the given context, false otherwise + */ + private applicable(where: Where, context: PageState, usedActions: string[] = []): boolean { + /** + * Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\ + * \ + * For every key in `subset`, there must be a corresponding key with equal scalar + * value in `superset`, or `inclusive(subset[key], superset[key])` must hold. + * @param subset Arbitrary non-cyclic JS object (where clause) + * @param superset Arbitrary non-cyclic JS object (browser context) + * @returns `true` if `subset <= superset`, `false` otherwise. + */ + const inclusive = (subset: Record, superset: Record) + : boolean => ( + Object.entries(subset).every( + ([key, value]) => { + /** + * Arrays are compared without order (are transformed into objects before comparison). + */ + const parsedValue = Array.isArray(value) ? arrayToObject(value) : value; + + const parsedSuperset: Record = {}; + parsedSuperset[key] = Array.isArray(superset[key]) + ? arrayToObject(superset[key]) + : superset[key]; + + // Every `subset` key must exist in the `superset` and + // have the same value (strict equality), or subset[key] <= superset[key] + return parsedSuperset[key] + && ( + (parsedSuperset[key] === parsedValue) + || ((parsedValue).constructor.name === 'RegExp' && (parsedValue).test(parsedSuperset[key])) + || ( + (parsedValue).constructor.name !== 'RegExp' + && typeof parsedValue === 'object' && inclusive(parsedValue, parsedSuperset[key]) + ) + ); + }, + ) + ); + + // Every value in the "where" object should be compliant to the current state. + return Object.entries(where).every( + ([key, value]) => { + if (operators.includes(key)) { + const array = Array.isArray(value) + ? value as Where[] + : Object.entries(value).map((a) => Object.fromEntries([a])); + // every condition is treated as a single context + + switch (key as keyof typeof operators) { + case '$and': + return array?.every((x) => this.applicable(x, context)); + case '$or': + return array?.some((x) => this.applicable(x, context)); + case '$not': + return !this.applicable(value, context); // $not should be a unary operator + default: + throw new Error('Undefined logic operator.'); + } + } else if (meta.includes(key)) { + const testRegexString = (x: string) => { + if (typeof value === 'string') { + return x === value; + } + + return (value).test(x); + }; + + switch (key as keyof typeof meta) { + case '$before': + return !usedActions.find(testRegexString); + case '$after': + return !!usedActions.find(testRegexString); + default: + throw new Error('Undefined meta operator.'); + } + } else { + // Current key is a base condition (url, cookies, selectors) + return inclusive({ [key]: value }, context); + } + }, + ); + } + + /** + * Given a Playwright's page object and a "declarative" list of actions, this function + * calls all mentioned functions on the Page object.\ + * \ + * Manipulates the iterator indexes (experimental feature, likely to be removed in + * the following versions of waw-interpreter) + * @param page Playwright Page object + * @param steps Array of actions. + */ + private async carryOutSteps(page: Page, steps: What[]): Promise { + /** + * Defines overloaded (or added) methods/actions usable in the workflow. + * If a method overloads any existing method of the Page class, it accepts the same set + * of parameters *(but can override some!)*\ + * \ + * Also, following piece of code defines functions to be run in the browser's context. + * Beware of false linter errors - here, we know better! + */ + const wawActions: Record void> = { + screenshot: async (params: PageScreenshotOptions) => { + const screenshotBuffer = await page.screenshot({ + ...params, path: undefined, + }); + await this.options.binaryCallback(screenshotBuffer, 'image/png'); + }, + enqueueLinks: async (selector: string) => { + const links: string[] = await page.locator(selector) + .evaluateAll( + // @ts-ignore + (elements) => elements.map((a) => a.href).filter((x) => x), + ); + const context = page.context(); + + for (const link of links) { + // eslint-disable-next-line + this.concurrency.addJob(async () => { + try { + const newPage = await context.newPage(); + await newPage.goto(link); + await newPage.waitForLoadState('networkidle'); + await this.runLoop(newPage, this.initializedWorkflow!); + } catch (e) { + // `runLoop` uses soft mode, so it recovers from it's own exceptions + // but newPage(), goto() and waitForLoadState() don't (and will kill + // the interpreter by throwing). + this.log(e, Level.ERROR); + } + }); + } + await page.close(); + }, + scrape: async (selector?: string) => { + await this.ensureScriptsLoaded(page); + + const scrapeResults: Record[] = await page.evaluate((s) => window.scrape(s ?? null), selector); + await this.options.serializableCallback(scrapeResults); + }, + + scrapeSchema: async (schema: Record) => { + await this.ensureScriptsLoaded(page); + + const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); + await this.options.serializableCallback(scrapeResult); + }, + + scroll: async (pages?: number) => { + await page.evaluate(async (pagesInternal) => { + for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { + // @ts-ignore + window.scrollTo(0, window.scrollY + window.innerHeight); + } + }, pages ?? 1); + }, + script: async (code: string) => { + const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( + async () => { }, + ).constructor; + const x = new AsyncFunction('page', 'log', code); + await x(page, this.log); + }, + flag: async () => new Promise((res) => { + this.emit('flag', page, res); + }), + }; + + for (const step of steps) { + this.log(`Launching ${step.action}`, Level.LOG); + + if (step.action in wawActions) { + // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not) + const params = !step.args || Array.isArray(step.args) ? step.args : [step.args]; + await wawActions[step.action as CustomFunctions](...(params ?? [])); + } else { + // Implements the dot notation for the "method name" in the workflow + const levels = step.action.split('.'); + const methodName = levels[levels.length - 1]; + + let invokee: any = page; + for (const level of levels.splice(0, levels.length - 1)) { + invokee = invokee[level]; + } + + if (!step.args || Array.isArray(step.args)) { + await (invokee[methodName])(...(step.args ?? [])); + } else { + await (invokee[methodName])(step.args); + } + } + + await new Promise((res) => { setTimeout(res, 500); }); + } + } + + private async runLoop(p: Page, workflow: Workflow) { + const usedActions: string[] = []; + let lastAction = null; + let repeatCount = 0; + + /** + * Enables the interpreter functionality for popup windows. + * User-requested concurrency should be entirely managed by the concurrency manager, + * e.g. via `enqueueLinks`. + */ + p.on('popup', (popup) => { + this.concurrency.addJob(() => this.runLoop(popup, workflow)); + }); + + /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ + while (true) { + // Checks whether the page was closed from outside, + // or the workflow execution has been stopped via `interpreter.stop()` + if (p.isClosed() || !this.stopper) { + return; + } + + try { + await p.waitForLoadState(); + } catch (e) { + await p.close(); + return; + } + + let pageState = {}; + try { + pageState = await this.getState(p, workflow); + } catch (e: any) { + this.log('The browser has been closed.'); + return; + } + + if (this.options.debug) { + this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN); + } + const actionId = workflow.findIndex( + (step) => this.applicable(step.where, pageState, usedActions), + ); + + const action = workflow[actionId]; + + this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG); + + if (action) { // action is matched + if (this.options.debugChannel?.activeId) { + this.options.debugChannel.activeId(actionId); + } + + repeatCount = action === lastAction ? repeatCount + 1 : 0; + if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) { + return; + } + lastAction = action; + + try { + await this.carryOutSteps(p, action.what); + usedActions.push(action.id ?? 'undefined'); + } catch (e) { + this.log(e, Level.ERROR); + } + } else { + return; + } + } + } + + private async ensureScriptsLoaded(page: Page) { + const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function'); + if (!isScriptLoaded) { + await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); + } + } + + /** + * Spawns a browser context and runs given workflow. + * \ + * Resolves after the playback is finished. + * @param {Page} [page] Page to run the workflow on. + * @param {ParamType} params Workflow specific, set of parameters + * for the `{$param: nameofparam}` fields. + */ + public async run(page: Page, params?: ParamType): Promise { + if (this.stopper) { + throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.'); + } + /** + * `this.workflow` with the parameters initialized. + */ + this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); + + await this.ensureScriptsLoaded(page); + + this.stopper = () => { + this.stopper = null; + }; + + this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!)); + + await this.concurrency.waitForCompletion(); + + this.stopper = null; + } + + public async stop(): Promise { + if (this.stopper) { + await this.stopper(); + this.stopper = null; + } else { + throw new Error('Cannot stop, there is no running workflow!'); + } + } +} \ No newline at end of file diff --git a/maxun-core/src/preprocessor.ts b/maxun-core/src/preprocessor.ts new file mode 100644 index 00000000..9ad15c2a --- /dev/null +++ b/maxun-core/src/preprocessor.ts @@ -0,0 +1,179 @@ +import Joi from 'joi'; +import { + Workflow, WorkflowFile, ParamType, SelectorArray, Where, +} from './types/workflow'; +import { operators } from './types/logic'; + +/** +* Class for static processing the workflow files/objects. +*/ +export default class Preprocessor { + static validateWorkflow(workflow: WorkflowFile): any { + const regex = Joi.object({ + $regex: Joi.string().required(), + }); + + const whereSchema = Joi.object({ + url: [Joi.string().uri(), regex], + selectors: Joi.array().items(Joi.string()), + cookies: Joi.object({}).pattern(Joi.string(), Joi.string()), + $after: [Joi.string(), regex], + $before: [Joi.string(), regex], + $and: Joi.array().items(Joi.link('#whereSchema')), + $or: Joi.array().items(Joi.link('#whereSchema')), + $not: Joi.link('#whereSchema'), + }).id('whereSchema'); + + const schema = Joi.object({ + meta: Joi.object({ + name: Joi.string(), + desc: Joi.string(), + }), + workflow: Joi.array().items( + Joi.object({ + id: Joi.string(), + where: whereSchema.required(), + what: Joi.array().items({ + action: Joi.string().required(), + args: Joi.array().items(Joi.any()), + }).required(), + }), + ).required(), + }); + + const { error } = schema.validate(workflow); + + return error; + } + +/** +* Extracts parameter names from the workflow. +* @param {WorkflowFile} workflow The given workflow +* @returns {String[]} List of parameters' names. +*/ + static getParams(workflow: WorkflowFile): string[] { + const getParamsRecurse = (object: any): string[] => { + if (typeof object === 'object') { + // Recursion base case + if (object.$param) { + return [object.$param]; + } + + // Recursion general case + return Object.values(object) + .reduce((p: string[], v: any): string[] => [...p, ...getParamsRecurse(v)], []); + } + return []; + }; + + return getParamsRecurse(workflow.workflow); + } + +/** +* List all the selectors used in the given workflow (only literal "selector" +* field in WHERE clauses so far) +*/ + // TODO : add recursive selector search (also in click/fill etc. events?) + static extractSelectors(workflow: Workflow): SelectorArray { + /** +* Given a Where condition, this function extracts +* all the existing selectors from it (recursively). +*/ + const selectorsFromCondition = (where: Where): SelectorArray => { + // the `selectors` field is either on the top level + let out = where.selectors ?? []; + if (!Array.isArray(out)) { + out = [out]; + } + + // or nested in the "operator" array + operators.forEach((op) => { + let condWhere = where[op]; + if (condWhere) { + condWhere = Array.isArray(condWhere) ? condWhere : [condWhere]; + (condWhere).forEach((subWhere) => { + out = [...out, ...selectorsFromCondition(subWhere)]; + }); + } + }); + + return out; + }; + + // Iterate through all the steps and extract the selectors from all of them. + return workflow.reduce((p: SelectorArray, step) => [ + ...p, + ...selectorsFromCondition(step.where).filter((x) => !p.includes(x)), + ], []); + } + +/** +* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects +* with the defined value. +* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). +*/ + static initWorkflow(workflow: Workflow, params?: ParamType): Workflow { + const paramNames = this.getParams({ workflow }); + + if (Object.keys(params ?? {}).sort().join(',') !== paramNames.sort().join(',')) { + throw new Error(`Provided parameters do not match the workflow parameters + provided: ${Object.keys(params ?? {}).sort().join(',')}, + expected: ${paramNames.sort().join(',')} + `); + } + /** + * A recursive method for initializing special `{key: value}` syntax objects in the workflow. + * @param object Workflow to initialize (or a part of it). + * @param k key to look for ($regex, $param) + * @param f function mutating the special `{}` syntax into + * its true representation (RegExp...) + * @returns Updated object + */ + const initSpecialRecurse = ( + object: unknown, + k: string, + f: (value: string) => unknown, + ): unknown => { + if (!object || typeof object !== 'object') { + return object; + } + + const out = object; + // for every key (child) of the object + Object.keys(object!).forEach((key) => { + // if the field has only one key, which is `k` + if (Object.keys((object)[key]).length === 1 && (object)[key][k]) { + // process the current special tag (init param, hydrate regex...) + (out)[key] = f((object)[key][k]); + } else { + initSpecialRecurse((object)[key], k, f); + } + }); + return out; + }; + + // TODO: do better deep copy, this is hideous. + let workflowCopy = JSON.parse(JSON.stringify(workflow)); + + if (params) { + workflowCopy = initSpecialRecurse( + workflowCopy, + '$param', + (paramName) => { + if (params && params[paramName]) { + return params[paramName]; + } + throw new SyntaxError(`Unspecified parameter found ${paramName}.`); + }, + ); + } + + workflowCopy = initSpecialRecurse( + workflowCopy, + '$regex', + (regex) => new RegExp(regex), + ); + + return workflowCopy; + } +} \ No newline at end of file diff --git a/maxun-core/src/types/logic.ts b/maxun-core/src/types/logic.ts new file mode 100644 index 00000000..5d06abbe --- /dev/null +++ b/maxun-core/src/types/logic.ts @@ -0,0 +1,5 @@ +export const unaryOperators = ['$not'] as const; +export const naryOperators = ['$and', '$or'] as const; + +export const operators = [...unaryOperators, ...naryOperators] as const; +export const meta = ['$before', '$after'] as const; \ No newline at end of file diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts new file mode 100644 index 00000000..36c6d14d --- /dev/null +++ b/maxun-core/src/types/workflow.ts @@ -0,0 +1,58 @@ +import { Page } from 'playwright'; +import { + naryOperators, unaryOperators, operators, meta, +} from './logic'; + +export type Operator = typeof operators[number]; +export type UnaryOperator = typeof unaryOperators[number]; +export type NAryOperator = typeof naryOperators[number]; + +export type Meta = typeof meta[number]; + +export type SelectorArray = string[]; + +type RegexableString = string | { '$regex': string }; + +type BaseConditions = { + 'url': RegexableString, + 'cookies': Record, + 'selectors': SelectorArray, // (CSS/Playwright) selectors use their own logic, there is no reason (and several technical difficulties) to allow regular expression notation +} & Record; + +export type Where = + Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N) + Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator + Partial; // or one of the base conditions + +type MethodNames = { + [K in keyof T]: T[K] extends Function ? K : never; +}[keyof T]; + +export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; + +export type What = { + action: MethodNames | CustomFunctions, + args?: any[] +}; + +export type PageState = Partial; + +export type ParamType = Record; + +export type MetaData = { + name?: string, + desc?: string, +}; + +export interface WhereWhatPair { + id?: string + where: Where + what: What[] +} + +export type Workflow = WhereWhatPair[]; + +export type WorkflowFile = { + meta?: MetaData, + workflow: Workflow +}; \ No newline at end of file diff --git a/maxun-core/src/utils/concurrency.ts b/maxun-core/src/utils/concurrency.ts new file mode 100644 index 00000000..eec7eb33 --- /dev/null +++ b/maxun-core/src/utils/concurrency.ts @@ -0,0 +1,85 @@ +/** + * Concurrency class for running concurrent tasks while managing a limited amount of resources. + */ +export default class Concurrency { + /** + * Maximum number of workers running in parallel. If set to `null`, there is no limit. + */ + maxConcurrency: number = 1; + + /** + * Number of currently active workers. + */ + activeWorkers: number = 0; + + /** + * Queue of jobs waiting to be completed. + */ + private jobQueue: Function[] = []; + + /** + * "Resolve" callbacks of the waitForCompletion() promises. + */ + private waiting: Function[] = []; + + /** + * Constructs a new instance of concurrency manager. + * @param {number} maxConcurrency Maximum number of workers running in parallel. + */ + constructor(maxConcurrency: number) { + this.maxConcurrency = maxConcurrency; + } + + /** + * Takes a waiting job out of the queue and runs it. + */ + private runNextJob(): void { + const job = this.jobQueue.pop(); + + if (job) { + // console.debug("Running a job..."); + job().then(() => { + // console.debug("Job finished, running the next waiting job..."); + this.runNextJob(); + }); + } else { + // console.debug("No waiting job found!"); + this.activeWorkers -= 1; + if (this.activeWorkers === 0) { + // console.debug("This concurrency manager is idle!"); + this.waiting.forEach((x) => x()); + } + } + } + + /** + * Pass a job (a time-demanding async function) to the concurrency manager. \ + * The time of the job's execution depends on the concurrency manager itself + * (given a generous enough `maxConcurrency` value, it might be immediate, + * but this is not guaranteed). + * @param worker Async function to be executed (job to be processed). + */ + addJob(job: () => Promise): void { + // console.debug("Adding a worker!"); + this.jobQueue.push(job); + + if (!this.maxConcurrency || this.activeWorkers < this.maxConcurrency) { + this.runNextJob(); + this.activeWorkers += 1; + } else { + // console.debug("No capacity to run a worker now, waiting!"); + } + } + + /** + * Waits until there is no running nor waiting job. \ + * If the concurrency manager is idle at the time of calling this function, + * it waits until at least one job is compeleted (can be "presubscribed"). + * @returns Promise, resolved after there is no running/waiting worker. + */ + waitForCompletion(): Promise { + return new Promise((res) => { + this.waiting.push(res); + }); + } +} \ No newline at end of file diff --git a/maxun-core/src/utils/logger.ts b/maxun-core/src/utils/logger.ts new file mode 100644 index 00000000..e57421aa --- /dev/null +++ b/maxun-core/src/utils/logger.ts @@ -0,0 +1,30 @@ +/* +* Logger class for more detailed and comprehensible logs (with colors and timestamps) +*/ + +export enum Level { + DATE = 36, + LOG = 0, + WARN = 93, + ERROR = 31, + DEBUG = 95, + RESET = 0, + } + + export default function logger( + message: string | Error, + level: (Level.LOG | Level.WARN | Level.ERROR | Level.DEBUG) = Level.LOG, + ) { + let m = message; + if (message.constructor.name.includes('Error') && typeof message !== 'string') { + m = (message).message; + } + process.stdout.write(`\x1b[${Level.DATE}m[${(new Date()).toLocaleString()}]\x1b[0m `); + process.stdout.write(`\x1b[${level}m`); + if (level === Level.ERROR || level === Level.WARN) { + process.stderr.write(m); + } else { + process.stdout.write(m); + } + process.stdout.write(`\x1b[${Level.RESET}m\n`); + } \ No newline at end of file diff --git a/maxun-core/src/utils/utils.ts b/maxun-core/src/utils/utils.ts new file mode 100644 index 00000000..48883dcf --- /dev/null +++ b/maxun-core/src/utils/utils.ts @@ -0,0 +1,13 @@ +/** + * ESLint rule in case there is only one util function + * (it still does not represent the "utils" file) +*/ + +/* eslint-disable import/prefer-default-export */ + +/** + * Converts an array of scalars to an object with **items** of the array **for keys**. + */ +export function arrayToObject(array : any[]) { + return array.reduce((p, x) => ({ ...p, [x]: [] }), {}); + } \ No newline at end of file diff --git a/maxun-core/tsconfig.json b/maxun-core/tsconfig.json new file mode 100644 index 00000000..4a1cf18b --- /dev/null +++ b/maxun-core/tsconfig.json @@ -0,0 +1,11 @@ +{ + "compilerOptions": { + "outDir": "./build", + "declaration": true, + "allowJs": true, + "target": "es5", + "module": "commonjs", + "esModuleInterop": true + }, + "include": ["src"] +} diff --git a/mx-interpreter/interpret.ts b/mx-interpreter/interpret.ts new file mode 100644 index 00000000..9ac32b0e --- /dev/null +++ b/mx-interpreter/interpret.ts @@ -0,0 +1,459 @@ +/* eslint-disable no-await-in-loop, no-restricted-syntax */ +import { Page, PageScreenshotOptions } from 'playwright'; +import path from 'path'; + +import { EventEmitter } from 'events'; +import { + Where, What, PageState, Workflow, WorkflowFile, + ParamType, SelectorArray, CustomFunctions, +} from './types/workflow'; + +import { operators, meta } from './types/logic'; +import { arrayToObject } from './utils/utils'; +import Concurrency from './utils/concurrency'; +import Preprocessor from './preprocessor'; +import log, { Level } from './utils/logger'; + +/** + * Defines optional intepreter options (passed in constructor) + */ +interface InterpreterOptions { + maxRepeats: number; + maxConcurrency: number; + serializableCallback: (output: any) => (void | Promise); + binaryCallback: (output: any, mimeType: string) => (void | Promise); + debug: boolean; + debugChannel: Partial<{ + activeId: Function, + debugMessage: Function, + }> +} + +/** + * Class for running the Smart Workflows. + */ +export default class Interpreter extends EventEmitter { + private workflow: Workflow; + + private initializedWorkflow: Workflow | null; + + private options: InterpreterOptions; + + private concurrency: Concurrency; + + private stopper: Function | null = null; + + private log: typeof log; + + constructor(workflow: WorkflowFile, options?: Partial) { + super(); + this.workflow = workflow.workflow; + this.initializedWorkflow = null; + this.options = { + maxRepeats: 5, + maxConcurrency: 5, + serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); }, + binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); }, + debug: false, + debugChannel: {}, + ...options, + }; + this.concurrency = new Concurrency(this.options.maxConcurrency); + this.log = (...args) => log(...args); + + const error = Preprocessor.validateWorkflow(workflow); + if (error) { + throw (error); + } + + if (this.options.debugChannel?.debugMessage) { + const oldLog = this.log; + // @ts-ignore + this.log = (...args: Parameters) => { + if (args[1] !== Level.LOG) { + this.options.debugChannel.debugMessage!(typeof args[0] === 'string' ? args[0] : args[0].message); + } + oldLog(...args); + }; + } + } + + /** + * Returns the context object from given Page and the current workflow.\ + * \ + * `workflow` is used for selector extraction - function searches for used selectors to + * look for later in the page's context. + * @param page Playwright Page object + * @param workflow Current **initialized** workflow (array of where-what pairs). + * @returns {PageState} State of the current page. + */ + private async getState(page: Page, workflow: Workflow): Promise { + /** + * All the selectors present in the current Workflow + */ + const selectors = Preprocessor.extractSelectors(workflow); + + /** + * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). + * @param selector Selector to be queried + * @returns True if the targetted element is actionable, false otherwise. + */ + const actionable = async (selector: string): Promise => { + try { + const proms = [ + page.isEnabled(selector, { timeout: 500 }), + page.isVisible(selector, { timeout: 500 }), + ]; + + return await Promise.all(proms).then((bools) => bools.every((x) => x)); + } catch (e) { + // log(e, Level.ERROR); + return false; + } + }; + + /** + * Object of selectors present in the current page. + */ + const presentSelectors: SelectorArray = await Promise.all( + selectors.map(async (selector) => { + if (await actionable(selector)) { + return [selector]; + } + return []; + }), + ).then((x) => x.flat()); + + return { + url: page.url(), + cookies: (await page.context().cookies([page.url()])) + .reduce((p, cookie) => ( + { + ...p, + [cookie.name]: cookie.value, + }), {}), + selectors: presentSelectors, + }; + } + + /** + * Tests if the given action is applicable with the given context. + * @param where Tested *where* condition + * @param context Current browser context. + * @returns True if `where` is applicable in the given context, false otherwise + */ + private applicable(where: Where, context: PageState, usedActions: string[] = []): boolean { + /** + * Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\ + * \ + * For every key in `subset`, there must be a corresponding key with equal scalar + * value in `superset`, or `inclusive(subset[key], superset[key])` must hold. + * @param subset Arbitrary non-cyclic JS object (where clause) + * @param superset Arbitrary non-cyclic JS object (browser context) + * @returns `true` if `subset <= superset`, `false` otherwise. + */ + const inclusive = (subset: Record, superset: Record) + : boolean => ( + Object.entries(subset).every( + ([key, value]) => { + /** + * Arrays are compared without order (are transformed into objects before comparison). + */ + const parsedValue = Array.isArray(value) ? arrayToObject(value) : value; + + const parsedSuperset: Record = {}; + parsedSuperset[key] = Array.isArray(superset[key]) + ? arrayToObject(superset[key]) + : superset[key]; + + // Every `subset` key must exist in the `superset` and + // have the same value (strict equality), or subset[key] <= superset[key] + return parsedSuperset[key] + && ( + (parsedSuperset[key] === parsedValue) + || ((parsedValue).constructor.name === 'RegExp' && (parsedValue).test(parsedSuperset[key])) + || ( + (parsedValue).constructor.name !== 'RegExp' + && typeof parsedValue === 'object' && inclusive(parsedValue, parsedSuperset[key]) + ) + ); + }, + ) + ); + + // Every value in the "where" object should be compliant to the current state. + return Object.entries(where).every( + ([key, value]) => { + if (operators.includes(key)) { + const array = Array.isArray(value) + ? value as Where[] + : Object.entries(value).map((a) => Object.fromEntries([a])); + // every condition is treated as a single context + + switch (key as keyof typeof operators) { + case '$and': + return array?.every((x) => this.applicable(x, context)); + case '$or': + return array?.some((x) => this.applicable(x, context)); + case '$not': + return !this.applicable(value, context); // $not should be a unary operator + default: + throw new Error('Undefined logic operator.'); + } + } else if (meta.includes(key)) { + const testRegexString = (x: string) => { + if (typeof value === 'string') { + return x === value; + } + + return (value).test(x); + }; + + switch (key as keyof typeof meta) { + case '$before': + return !usedActions.find(testRegexString); + case '$after': + return !!usedActions.find(testRegexString); + default: + throw new Error('Undefined meta operator.'); + } + } else { + // Current key is a base condition (url, cookies, selectors) + return inclusive({ [key]: value }, context); + } + }, + ); + } + + /** + * Given a Playwright's page object and a "declarative" list of actions, this function + * calls all mentioned functions on the Page object.\ + * \ + * Manipulates the iterator indexes (experimental feature, likely to be removed in + * the following versions of waw-interpreter) + * @param page Playwright Page object + * @param steps Array of actions. + */ + private async carryOutSteps(page: Page, steps: What[]): Promise { + /** + * Defines overloaded (or added) methods/actions usable in the workflow. + * If a method overloads any existing method of the Page class, it accepts the same set + * of parameters *(but can override some!)*\ + * \ + * Also, following piece of code defines functions to be run in the browser's context. + * Beware of false linter errors - here, we know better! + */ + const wawActions: Record void> = { + screenshot: async (params: PageScreenshotOptions) => { + const screenshotBuffer = await page.screenshot({ + ...params, path: undefined, + }); + await this.options.binaryCallback(screenshotBuffer, 'image/png'); + }, + enqueueLinks: async (selector: string) => { + const links: string[] = await page.locator(selector) + .evaluateAll( + // @ts-ignore + (elements) => elements.map((a) => a.href).filter((x) => x), + ); + const context = page.context(); + + for (const link of links) { + // eslint-disable-next-line + this.concurrency.addJob(async () => { + try { + const newPage = await context.newPage(); + await newPage.goto(link); + await newPage.waitForLoadState('networkidle'); + await this.runLoop(newPage, this.initializedWorkflow!); + } catch (e) { + // `runLoop` uses soft mode, so it recovers from it's own exceptions + // but newPage(), goto() and waitForLoadState() don't (and will kill + // the interpreter by throwing). + this.log(e, Level.ERROR); + } + }); + } + await page.close(); + }, + scrape: async (selector?: string) => { + const scrapeResults: Record[] = await page + // eslint-disable-next-line + // @ts-ignore + .evaluate((s) => scrape(s ?? null), selector); + await this.options.serializableCallback(scrapeResults); + }, + scrapeSchema: async (schema: Record) => { + const handleLists = await Promise.all( + Object.values(schema).map((selector) => page.$$(selector)), + ); + + const namedHandleLists = Object.fromEntries( + Object.keys(schema).map((key, i) => [key, handleLists[i]]), + ); + + const scrapeResult = await page.evaluate((n) => scrapeSchema(n), namedHandleLists); + + this.options.serializableCallback(scrapeResult); + }, + scroll: async (pages?: number) => { + await page.evaluate(async (pagesInternal) => { + for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { + // @ts-ignore + window.scrollTo(0, window.scrollY + window.innerHeight); + } + }, pages ?? 1); + }, + script: async (code: string) => { + const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( + async () => { }, + ).constructor; + const x = new AsyncFunction('page', 'log', code); + await x(page, this.log); + }, + flag: async () => new Promise((res) => { + this.emit('flag', page, res); + }), + }; + + for (const step of steps) { + this.log(`Launching ${step.action}`, Level.LOG); + + if (step.action in wawActions) { + // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not) + const params = !step.args || Array.isArray(step.args) ? step.args : [step.args]; + await wawActions[step.action as CustomFunctions](...(params ?? [])); + } else { + // Implements the dot notation for the "method name" in the workflow + const levels = step.action.split('.'); + const methodName = levels[levels.length - 1]; + + let invokee: any = page; + for (const level of levels.splice(0, levels.length - 1)) { + invokee = invokee[level]; + } + + if (!step.args || Array.isArray(step.args)) { + await (invokee[methodName])(...(step.args ?? [])); + } else { + await (invokee[methodName])(step.args); + } + } + + await new Promise((res) => { setTimeout(res, 500); }); + } + } + + private async runLoop(p: Page, workflow: Workflow) { + const usedActions: string[] = []; + let lastAction = null; + let repeatCount = 0; + + /** + * Enables the interpreter functionality for popup windows. + * User-requested concurrency should be entirely managed by the concurrency manager, + * e.g. via `enqueueLinks`. + */ + p.on('popup', (popup) => { + this.concurrency.addJob(() => this.runLoop(popup, workflow)); + }); + + /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ + while (true) { + // Checks whether the page was closed from outside, + // or the workflow execution has been stopped via `interpreter.stop()` + if (p.isClosed() || !this.stopper) { + return; + } + + try { + await p.waitForLoadState(); + } catch (e) { + await p.close(); + return; + } + + let pageState = {}; + try { + pageState = await this.getState(p, workflow); + } catch (e: any) { + this.log('The browser has been closed.'); + return; + } + + if (this.options.debug) { + this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN); + } + const actionId = workflow.findIndex( + (step) => this.applicable(step.where, pageState, usedActions), + ); + + const action = workflow[actionId]; + + this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG); + + if (action) { // action is matched + if (this.options.debugChannel?.activeId) { + this.options.debugChannel.activeId(actionId); + } + + repeatCount = action === lastAction ? repeatCount + 1 : 0; + if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) { + return; + } + lastAction = action; + + try { + await this.carryOutSteps(p, action.what); + usedActions.push(action.id ?? 'undefined'); + } catch (e) { + this.log(e, Level.ERROR); + } + } else { + return; + } + } + } + + /** + * Spawns a browser context and runs given workflow. + * \ + * Resolves after the playback is finished. + * @param {Page} [page] Page to run the workflow on. + * @param {ParamType} params Workflow specific, set of parameters + * for the `{$param: nameofparam}` fields. + */ + public async run(page: Page, params?: ParamType): Promise { + if (this.stopper) { + throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.'); + } + /** + * `this.workflow` with the parameters initialized. + */ + this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); + + // @ts-ignore + if (await page.evaluate(() => !window.scrape)) { + page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); + } + + this.stopper = () => { + this.stopper = null; + }; + + this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!)); + + await this.concurrency.waitForCompletion(); + + this.stopper = null; + } + + public async stop(): Promise { + if (this.stopper) { + await this.stopper(); + this.stopper = null; + } else { + throw new Error('Cannot stop, there is no running workflow!'); + } + } +} \ No newline at end of file diff --git a/mx-interpreter/preprocessor.ts b/mx-interpreter/preprocessor.ts new file mode 100644 index 00000000..9ad15c2a --- /dev/null +++ b/mx-interpreter/preprocessor.ts @@ -0,0 +1,179 @@ +import Joi from 'joi'; +import { + Workflow, WorkflowFile, ParamType, SelectorArray, Where, +} from './types/workflow'; +import { operators } from './types/logic'; + +/** +* Class for static processing the workflow files/objects. +*/ +export default class Preprocessor { + static validateWorkflow(workflow: WorkflowFile): any { + const regex = Joi.object({ + $regex: Joi.string().required(), + }); + + const whereSchema = Joi.object({ + url: [Joi.string().uri(), regex], + selectors: Joi.array().items(Joi.string()), + cookies: Joi.object({}).pattern(Joi.string(), Joi.string()), + $after: [Joi.string(), regex], + $before: [Joi.string(), regex], + $and: Joi.array().items(Joi.link('#whereSchema')), + $or: Joi.array().items(Joi.link('#whereSchema')), + $not: Joi.link('#whereSchema'), + }).id('whereSchema'); + + const schema = Joi.object({ + meta: Joi.object({ + name: Joi.string(), + desc: Joi.string(), + }), + workflow: Joi.array().items( + Joi.object({ + id: Joi.string(), + where: whereSchema.required(), + what: Joi.array().items({ + action: Joi.string().required(), + args: Joi.array().items(Joi.any()), + }).required(), + }), + ).required(), + }); + + const { error } = schema.validate(workflow); + + return error; + } + +/** +* Extracts parameter names from the workflow. +* @param {WorkflowFile} workflow The given workflow +* @returns {String[]} List of parameters' names. +*/ + static getParams(workflow: WorkflowFile): string[] { + const getParamsRecurse = (object: any): string[] => { + if (typeof object === 'object') { + // Recursion base case + if (object.$param) { + return [object.$param]; + } + + // Recursion general case + return Object.values(object) + .reduce((p: string[], v: any): string[] => [...p, ...getParamsRecurse(v)], []); + } + return []; + }; + + return getParamsRecurse(workflow.workflow); + } + +/** +* List all the selectors used in the given workflow (only literal "selector" +* field in WHERE clauses so far) +*/ + // TODO : add recursive selector search (also in click/fill etc. events?) + static extractSelectors(workflow: Workflow): SelectorArray { + /** +* Given a Where condition, this function extracts +* all the existing selectors from it (recursively). +*/ + const selectorsFromCondition = (where: Where): SelectorArray => { + // the `selectors` field is either on the top level + let out = where.selectors ?? []; + if (!Array.isArray(out)) { + out = [out]; + } + + // or nested in the "operator" array + operators.forEach((op) => { + let condWhere = where[op]; + if (condWhere) { + condWhere = Array.isArray(condWhere) ? condWhere : [condWhere]; + (condWhere).forEach((subWhere) => { + out = [...out, ...selectorsFromCondition(subWhere)]; + }); + } + }); + + return out; + }; + + // Iterate through all the steps and extract the selectors from all of them. + return workflow.reduce((p: SelectorArray, step) => [ + ...p, + ...selectorsFromCondition(step.where).filter((x) => !p.includes(x)), + ], []); + } + +/** +* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects +* with the defined value. +* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). +*/ + static initWorkflow(workflow: Workflow, params?: ParamType): Workflow { + const paramNames = this.getParams({ workflow }); + + if (Object.keys(params ?? {}).sort().join(',') !== paramNames.sort().join(',')) { + throw new Error(`Provided parameters do not match the workflow parameters + provided: ${Object.keys(params ?? {}).sort().join(',')}, + expected: ${paramNames.sort().join(',')} + `); + } + /** + * A recursive method for initializing special `{key: value}` syntax objects in the workflow. + * @param object Workflow to initialize (or a part of it). + * @param k key to look for ($regex, $param) + * @param f function mutating the special `{}` syntax into + * its true representation (RegExp...) + * @returns Updated object + */ + const initSpecialRecurse = ( + object: unknown, + k: string, + f: (value: string) => unknown, + ): unknown => { + if (!object || typeof object !== 'object') { + return object; + } + + const out = object; + // for every key (child) of the object + Object.keys(object!).forEach((key) => { + // if the field has only one key, which is `k` + if (Object.keys((object)[key]).length === 1 && (object)[key][k]) { + // process the current special tag (init param, hydrate regex...) + (out)[key] = f((object)[key][k]); + } else { + initSpecialRecurse((object)[key], k, f); + } + }); + return out; + }; + + // TODO: do better deep copy, this is hideous. + let workflowCopy = JSON.parse(JSON.stringify(workflow)); + + if (params) { + workflowCopy = initSpecialRecurse( + workflowCopy, + '$param', + (paramName) => { + if (params && params[paramName]) { + return params[paramName]; + } + throw new SyntaxError(`Unspecified parameter found ${paramName}.`); + }, + ); + } + + workflowCopy = initSpecialRecurse( + workflowCopy, + '$regex', + (regex) => new RegExp(regex), + ); + + return workflowCopy; + } +} \ No newline at end of file diff --git a/server/src/browser-management/inputHandlers.ts b/server/src/browser-management/inputHandlers.ts index d6902b3f..2e43a4c5 100644 --- a/server/src/browser-management/inputHandlers.ts +++ b/server/src/browser-management/inputHandlers.ts @@ -271,6 +271,7 @@ const handleChangeUrl = async (generator: WorkflowGenerator, page: Page, url: st try { await page.goto(url); logger.log('debug', `Went to ${url}`); + console.log(`Went to ${url}`) } catch (e) { const { message } = e as Error; logger.log('error', message); diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts new file mode 100644 index 00000000..beed25bb --- /dev/null +++ b/server/src/routes/storage.ts @@ -0,0 +1,235 @@ +/** + * RESTful API endpoints handling the recording storage. + */ + +import { Router } from 'express'; +import logger from "../logger"; +import { deleteFile, readFile, readFiles, saveFile } from "../workflow-management/storage"; +import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller"; +import { chromium } from "playwright"; +import { browserPool } from "../server"; +import fs from "fs"; +import { uuid } from "uuidv4"; + +export const router = Router(); + +/** + * Logs information about recordings API. + */ +router.all('/', (req, res, next) => { + logger.log('debug',`The recordings API was invoked: ${req.url}`) + next() // pass control to the next handler +}) + +/** + * GET endpoint for getting an array of all stored recordings. + */ +router.get('/recordings', async (req, res) => { + try { + const data = await readFiles('./../storage/recordings/'); + return res.send(data); + } catch (e) { + logger.log('info', 'Error while reading recordings'); + return res.send(null); + } +}); + +/** + * DELETE endpoint for deleting a recording from the storage. + */ +router.delete('/recordings/:fileName', async (req, res) => { + try { + await deleteFile(`./../storage/recordings/${req.params.fileName}.waw.json`); + return res.send(true); + } catch (e) { + const {message} = e as Error; + logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.waw.json`); + return res.send(false); + } +}); + +/** + * GET endpoint for getting an array of runs from the storage. + */ +router.get('/runs', async (req, res) => { + try { + const data = await readFiles('./../storage/runs/'); + return res.send(data); + } catch (e) { + logger.log('info', 'Error while reading runs'); + return res.send(null); + } +}); + +/** + * DELETE endpoint for deleting a run from the storage. + */ +router.delete('/runs/:fileName', async (req, res) => { + try { + await deleteFile(`./../storage/runs/${req.params.fileName}.json`); + return res.send(true); + } catch (e) { + const {message} = e as Error; + logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`); + return res.send(false); + } +}); + +/** + * PUT endpoint for starting a remote browser instance and saving run metadata to the storage. + * Making it ready for interpretation and returning a runId. + */ +router.put('/runs/:fileName', async (req, res) => { + try { + const id = createRemoteBrowserForRun({ + browser: chromium, + launchOptions: { headless: true } + }); + + const runId = uuid(); + + const run_meta = { + status: 'RUNNING', + name: req.params.fileName, + startedAt: new Date().toLocaleString(), + finishedAt: '', + duration: '', + task: req.body.params ? 'task' : '', + browserId: id, + interpreterSettings: req.body, + log: '', + runId, + }; + fs.mkdirSync('../storage/runs', { recursive: true }) + await saveFile( + `../storage/runs/${req.params.fileName}_${runId}.json`, + JSON.stringify({ ...run_meta }, null, 2) + ); + logger.log('debug', `Created run with name: ${req.params.fileName}.json`); + return res.send({ + browserId: id, + runId: runId, + }); + } catch (e) { + const {message} = e as Error; + logger.log('info', `Error while creating a run with name: ${req.params.fileName}.json`); + return res.send(''); + } +}); + +/** + * GET endpoint for getting a run from the storage. + */ +router.get('/runs/run/:fileName/:runId', async (req, res) => { + try { + // read the run from storage + const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`) + const parsedRun = JSON.parse(run); + return res.send(parsedRun); + } catch (e) { + const { message } = e as Error; + logger.log('error', `Error ${message} while reading a run with name: ${req.params.fileName}_${req.params.runId}.json`); + return res.send(null); + } +}); + +/** + * PUT endpoint for finishing a run and saving it to the storage. + */ +router.post('/runs/run/:fileName/:runId', async (req, res) => { + try { + // read the recording from storage + const recording = await readFile(`./../storage/recordings/${req.params.fileName}.waw.json`) + const parsedRecording = JSON.parse(recording); + // read the run from storage + const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`) + const parsedRun = JSON.parse(run); + + // interpret the run in active browser + const browser = browserPool.getRemoteBrowser(parsedRun.browserId); + const currentPage = browser?.getCurrentPage(); + if (browser && currentPage) { + const interpretationInfo = await browser.interpreter.InterpretRecording( + parsedRecording.recording, currentPage, parsedRun.interpreterSettings); + const duration = Math.round((new Date().getTime() - new Date(parsedRun.startedAt).getTime()) / 1000); + const durString = (() => { + if (duration < 60) { + return `${duration} s`; + } + else { + const minAndS = (duration / 60).toString().split('.'); + return `${minAndS[0]} m ${minAndS[1]} s`; + } + })(); + await destroyRemoteBrowser(parsedRun.browserId); + const run_meta = { + ...parsedRun, + status: interpretationInfo.result, + finishedAt: new Date().toLocaleString(), + duration: durString, + browserId: null, + log: interpretationInfo.log.join('\n'), + serializableOutput: interpretationInfo.serializableOutput, + binaryOutput: interpretationInfo.binaryOutput, + }; + fs.mkdirSync('../storage/runs', { recursive: true }) + await saveFile( + `../storage/runs/${parsedRun.name}_${req.params.runId}.json`, + JSON.stringify(run_meta, null, 2) + ); + return res.send(true); + } else { + throw new Error('Could not destroy browser'); + } + } catch (e) { + const {message} = e as Error; + logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`); + return res.send(false); + } +}); + +/** + * POST endpoint for aborting a current interpretation of the run. + */ +router.post('/runs/abort/:fileName/:runId', async (req, res) => { + try { + // read the run from storage + const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`) + const parsedRun = JSON.parse(run); + + //get current log + const browser = browserPool.getRemoteBrowser(parsedRun.browserId); + const currentLog = browser?.interpreter.debugMessages.join('/n'); + const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => { + return { + [`item-${index}`]: item, + ...reducedObject, + } + }, {}); + const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => { + return { + [`item-${index}`]: item, + ...reducedObject, + } + }, {}); + const run_meta = { + ...parsedRun, + status: 'ABORTED', + finishedAt: null, + duration: '', + browserId: null, + log: currentLog, + }; + + fs.mkdirSync('../storage/runs', { recursive: true }) + await saveFile( + `../storage/runs/${parsedRun.name}_${req.params.runId}.json`, + JSON.stringify({ ...run_meta, serializableOutput, binaryOutput }, null, 2) + ); + return res.send(true); + } catch (e) { + const {message} = e as Error; + logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`); + return res.send(false); + } +}); diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 234d7014..ae2d2016 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -1,5 +1,5 @@ import { Action, ActionType, Coordinates, TagName } from "../../types"; -import { WhereWhatPair, WorkflowFile } from '@wbr-project/wbr-interpret'; +import { WhereWhatPair, WorkflowFile } from 'maxun-core'; import logger from "../../logger"; import { Socket } from "socket.io"; import { Page } from "playwright"; @@ -484,9 +484,9 @@ export class WorkflowGenerator { public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { const rect = await getRect(page, coordinates); const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); - //console.log('Backend Rectangle:', rect) + const elementInfo = await getElementInformation(page, coordinates); if (rect) { - this.socket.emit('highlighter', { rect, selector: displaySelector }); + this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); } } diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index ff3d8c08..fa5e9332 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -1,4 +1,4 @@ -import Interpreter, { WorkflowFile } from "@wbr-project/wbr-interpret"; +import Interpreter, { WorkflowFile } from "maxun-core"; import logger from "../../logger"; import { Socket } from "socket.io"; import { Page } from "playwright"; @@ -8,7 +8,7 @@ import { InterpreterSettings } from "../../types"; * This class implements the main interpretation functions. * It holds some information about the current interpretation process and * registers to some events to allow the client (frontend) to interact with the interpreter. - * It uses the [@wbr-project/wbr-interpret](https://www.npmjs.com/package/@wbr-project/wbr-interpret) + * It uses the [maxun-core](https://www.npmjs.com/package/maxun-core) * library to interpret the workflow. * @category WorkflowManagement */ @@ -26,7 +26,7 @@ export class WorkflowInterpreter { /** * The instance of the {@link Interpreter} class used to interpret the workflow. - * From @wbr-project/wbr-interpret. + * From maxun-core. * @private */ private interpreter: Interpreter | null = null; diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 0290ee9f..b94ed2d1 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -1,6 +1,6 @@ import { Page } from "playwright"; import { Action, ActionType, Coordinates, TagName } from "../types"; -import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair, WorkflowFile } from "maxun-core"; import logger from "../logger"; import { getBestSelectorForAction } from "./utils"; @@ -111,7 +111,7 @@ export const getElementInformation = async ( console.log(`Element innerText: ${elementInfo.innerText}`); } } - + return elementInfo; } catch (error) { const { message, stack } = error as Error; diff --git a/src/api/storage.ts b/src/api/storage.ts new file mode 100644 index 00000000..93e99212 --- /dev/null +++ b/src/api/storage.ts @@ -0,0 +1,120 @@ +import { default as axios } from "axios"; +import { WorkflowFile } from "maxun-core"; +import { RunSettings } from "../components/molecules/RunSettings"; +import { CreateRunResponse } from "../pages/MainPage"; + +export const getStoredRecordings = async (): Promise => { + try { + const response = await axios.get('http://localhost:8080/storage/recordings'); + if (response.status === 200) { + return response.data; + } else { + throw new Error('Couldn\'t retrieve stored recordings'); + } + } catch(error: any) { + console.log(error); + return null; + } +}; + +export const getStoredRuns = async (): Promise => { + try { + const response = await axios.get('http://localhost:8080/storage/runs'); + if (response.status === 200) { + return response.data; + } else { + throw new Error('Couldn\'t retrieve stored recordings'); + } + } catch(error: any) { + console.log(error); + return null; + } +}; + +export const deleteRecordingFromStorage = async (fileName: string): Promise => { + try { + const response = await axios.delete(`http://localhost:8080/storage/recordings/${fileName}`); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't delete stored recording ${fileName}`); + } + } catch(error: any) { + console.log(error); + return false; + } +}; + +export const deleteRunFromStorage = async (fileName: string): Promise => { + try { + const response = await axios.delete(`http://localhost:8080/storage/runs/${fileName}`); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't delete stored recording ${fileName}`); + } + } catch(error: any) { + console.log(error); + return false; + } +}; + +export const editRecordingFromStorage = async (browserId: string, fileName: string): Promise => { + try { + const response = await axios.put(`http://localhost:8080/workflow/${browserId}/${fileName}`); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't edit stored recording ${fileName}`); + } + } catch(error: any) { + console.log(error); + return null; + } +}; + +export const createRunForStoredRecording = async (fileName: string, settings: RunSettings): Promise => { + try { + const response = await axios.put( + `http://localhost:8080/storage/runs/${fileName}`, + {...settings}); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't create a run for a recording ${fileName}`); + } + } catch(error: any) { + console.log(error); + return {browserId: '', runId: ''}; + } +} + +export const interpretStoredRecording = async (fileName: string, runId: string): Promise => { + try { + const response = await axios.post(`http://localhost:8080/storage/runs/run/${fileName}/${runId}`); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't run a recording ${fileName}`); + } + } catch(error: any) { + console.log(error); + return false; + } +} + +export const notifyAboutAbort = async (fileName: string, runId:string): Promise => { + try { + const response = await axios.post(`http://localhost:8080/storage/runs/abort/${fileName}/${runId}`); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't abort a running recording ${fileName} with id ${runId}`); + } + } catch(error: any) { + console.log(error); + return false; + } +} + + diff --git a/src/api/workflow.ts b/src/api/workflow.ts index 212737cb..dd76d9ac 100644 --- a/src/api/workflow.ts +++ b/src/api/workflow.ts @@ -1,4 +1,4 @@ -import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { emptyWorkflow } from "../shared/constants"; const axios = require('axios').default; diff --git a/src/components/atoms/ConfirmationBox.tsx b/src/components/atoms/ConfirmationBox.tsx new file mode 100644 index 00000000..b3eb10c2 --- /dev/null +++ b/src/components/atoms/ConfirmationBox.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import { Box, Button, IconButton, Stack, Typography } from "@mui/material"; + +interface ConfirmationBoxProps { + selector: string; + onYes: () => void; + onNo: () => void; +} + +export const ConfirmationBox = ({ selector, onYes, onNo }: ConfirmationBoxProps) => { + return ( + + + Confirmation + + + Do you want to interact with the element: {selector}? + + + + + + + ); +}; \ No newline at end of file diff --git a/src/components/atoms/GenericModal.tsx b/src/components/atoms/GenericModal.tsx index 30ba7a68..63aa6900 100644 --- a/src/components/atoms/GenericModal.tsx +++ b/src/components/atoms/GenericModal.tsx @@ -41,4 +41,5 @@ const defaultModalStyle = { display: 'block', overflow: 'scroll', padding: '5px 25px 10px 25px', + zIndex: 3147483647, }; diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index 4949277d..34fb8f47 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -24,9 +24,9 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei }; - console.log('unmodifiedRect:', unmodifiedRect) - console.log('rectangle:', rect) - console.log('canvas rectangle:', canvasRect) + //console.log('unmodifiedRect:', unmodifiedRect) + //console.log('rectangle:', rect) + //console.log('canvas rectangle:', canvasRect) return (
diff --git a/src/components/atoms/PairDisplayDiv.tsx b/src/components/atoms/PairDisplayDiv.tsx index 3611e135..c7c4447c 100644 --- a/src/components/atoms/PairDisplayDiv.tsx +++ b/src/components/atoms/PairDisplayDiv.tsx @@ -1,6 +1,6 @@ import React, { FC } from 'react'; import Typography from '@mui/material/Typography'; -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import styled from "styled-components"; interface PairDisplayDivProps { diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index 9a879106..f6e6fb1c 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -2,6 +2,7 @@ import React, { useCallback, useEffect, useRef } from 'react'; import { useSocketStore } from '../../context/socket'; import { getMappedCoordinates } from "../../helpers/inputHelpers"; import { useGlobalInfoStore } from "../../context/globalInfo"; +import { useActionContext } from '../../context/browserActions'; interface CreateRefCallback { (ref: React.RefObject): void; @@ -26,6 +27,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); + const { getText, getScreenshot } = useActionContext(); + const getTextRef = useRef(getText); const notifyLastAction = (action: string) => { if (lastAction !== action) { @@ -34,7 +37,10 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { }; const lastMousePosition = useRef({ x: 0, y: 0 }); - //const lastWheelPosition = useRef({ deltaX: 0, deltaY: 0 }); + + useEffect(() => { + getTextRef.current = getText; + }, [getText]); const onMouseEvent = useCallback((event: MouseEvent) => { if (socket) { @@ -45,7 +51,11 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { switch (event.type) { case 'mousedown': const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height); - socket.emit('input:mousedown', clickCoordinates); + if (getTextRef.current === true) { + console.log('get text') + } else { + socket.emit('input:mousedown', clickCoordinates); + } notifyLastAction('click'); break; case 'mousemove': diff --git a/src/components/molecules/ActionSettings.tsx b/src/components/molecules/ActionSettings.tsx index 816c3e76..dcf2ba5c 100644 --- a/src/components/molecules/ActionSettings.tsx +++ b/src/components/molecules/ActionSettings.tsx @@ -20,16 +20,10 @@ export const ActionSettings = ({ action }: ActionSettingsProps) => { return ; case 'scroll': return ; - case 'scrape': - return ; + case 'scrape': + return ; case 'scrapeSchema': return ; - case 'script': - return ; - case 'enqueueLinks': - return ; - case 'mouse.click': - return ; default: return null; } diff --git a/src/components/molecules/AddWhatCondModal.tsx b/src/components/molecules/AddWhatCondModal.tsx index 714c0fef..5fadfaa6 100644 --- a/src/components/molecules/AddWhatCondModal.tsx +++ b/src/components/molecules/AddWhatCondModal.tsx @@ -1,4 +1,4 @@ -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import { GenericModal } from "../atoms/GenericModal"; import { modalStyle } from "./AddWhereCondModal"; import { Button, MenuItem, TextField, Typography } from "@mui/material"; diff --git a/src/components/molecules/AddWhereCondModal.tsx b/src/components/molecules/AddWhereCondModal.tsx index 182a8dd6..e5c34015 100644 --- a/src/components/molecules/AddWhereCondModal.tsx +++ b/src/components/molecules/AddWhereCondModal.tsx @@ -6,7 +6,7 @@ import { } from "@mui/material"; import React, { useRef } from "react"; import { GenericModal } from "../atoms/GenericModal"; -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import { SelectChangeEvent } from "@mui/material/Select/Select"; import { DisplayConditionSettings } from "./DisplayWhereConditionSettings"; import { useSocketStore } from "../../context/socket"; diff --git a/src/components/molecules/InterpretationButtons.tsx b/src/components/molecules/InterpretationButtons.tsx index bedb85af..7e0633af 100644 --- a/src/components/molecules/InterpretationButtons.tsx +++ b/src/components/molecules/InterpretationButtons.tsx @@ -5,7 +5,7 @@ import { interpretCurrentRecording, stopCurrentInterpretation } from "../../api/ import { useSocketStore } from "../../context/socket"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { GenericModal } from "../atoms/GenericModal"; -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import HelpIcon from '@mui/icons-material/Help'; interface InterpretationButtonsProps { diff --git a/src/components/molecules/LeftSidePanelContent.tsx b/src/components/molecules/LeftSidePanelContent.tsx index 5c19996e..79750d8f 100644 --- a/src/components/molecules/LeftSidePanelContent.tsx +++ b/src/components/molecules/LeftSidePanelContent.tsx @@ -1,7 +1,7 @@ import React, { useCallback, useEffect, useState } from 'react'; import Box from "@mui/material/Box"; import { Pair } from "./Pair"; -import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { useSocketStore } from "../../context/socket"; import { Add } from "@mui/icons-material"; import { Socket } from "socket.io-client"; diff --git a/src/components/molecules/Pair.tsx b/src/components/molecules/Pair.tsx index 7f501719..b05b912d 100644 --- a/src/components/molecules/Pair.tsx +++ b/src/components/molecules/Pair.tsx @@ -1,7 +1,7 @@ import React, { FC, useState } from 'react'; import { Stack, Button, IconButton, Tooltip, Chip, Badge } from "@mui/material"; import { AddPair, deletePair, UpdatePair } from "../../api/workflow"; -import { WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WorkflowFile } from "maxun-core"; import { ClearButton } from "../atoms/buttons/ClearButton"; import { GenericModal } from "../atoms/GenericModal"; import { PairEditForm } from "./PairEditForm"; diff --git a/src/components/molecules/PairDetail.tsx b/src/components/molecules/PairDetail.tsx index 9d37bb6d..aea6191b 100644 --- a/src/components/molecules/PairDetail.tsx +++ b/src/components/molecules/PairDetail.tsx @@ -1,5 +1,5 @@ import React, { useLayoutEffect, useRef, useState } from 'react'; -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import { Box, Button, IconButton, MenuItem, Stack, TextField, Tooltip, Typography } from "@mui/material"; import { Close, KeyboardArrowDown, KeyboardArrowUp } from "@mui/icons-material"; import TreeView from '@mui/lab/TreeView'; diff --git a/src/components/molecules/PairEditForm.tsx b/src/components/molecules/PairEditForm.tsx index 815acfdd..7ab9c381 100644 --- a/src/components/molecules/PairEditForm.tsx +++ b/src/components/molecules/PairEditForm.tsx @@ -1,6 +1,6 @@ import { Button, TextField, Typography } from "@mui/material"; import React, { FC } from "react"; -import { Preprocessor, WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { Preprocessor, WhereWhatPair } from "maxun-core"; interface PairProps { index: string; diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index df389a41..ee510761 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -8,7 +8,7 @@ import TableHead from '@mui/material/TableHead'; import TablePagination from '@mui/material/TablePagination'; import TableRow from '@mui/material/TableRow'; import { useEffect } from "react"; -import { WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WorkflowFile } from "maxun-core"; import { IconButton } from "@mui/material"; import { Assignment, DeleteForever, Edit, PlayCircle } from "@mui/icons-material"; import { useGlobalInfoStore } from "../../context/globalInfo"; diff --git a/src/components/molecules/SidePanelHeader.tsx b/src/components/molecules/SidePanelHeader.tsx index 7743230e..54ae3060 100644 --- a/src/components/molecules/SidePanelHeader.tsx +++ b/src/components/molecules/SidePanelHeader.tsx @@ -3,7 +3,7 @@ import { InterpretationButtons } from "./InterpretationButtons"; import { AddButton } from "../atoms/buttons/AddButton"; import { GenericModal } from "../atoms/GenericModal"; import { PairEditForm } from "./PairEditForm"; -import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { AddPair } from "../../api/workflow"; import { Button, Stack } from "@mui/material"; import { FastForward } from "@mui/icons-material"; diff --git a/src/components/molecules/action-settings/clickOnCoordinates.tsx b/src/components/molecules/action-settings/clickOnCoordinates.tsx deleted file mode 100644 index c68a53b0..00000000 --- a/src/components/molecules/action-settings/clickOnCoordinates.tsx +++ /dev/null @@ -1,39 +0,0 @@ -import React, { forwardRef, useImperativeHandle } from 'react'; -import { Stack, TextField } from "@mui/material"; -import { WarningText } from '../../atoms/texts'; -import InfoIcon from "@mui/icons-material/Info"; - -export const ClickOnCoordinatesSettings = forwardRef((props, ref) => { - const [settings, setSettings] = React.useState([0, 0]); - useImperativeHandle(ref, () => ({ - getSettings() { - return settings; - } - })); - - return ( - - setSettings(prevState => ([Number(e.target.value), prevState[1]]))} - required - defaultValue={settings[0]} - /> - setSettings(prevState => ([prevState[0], Number(e.target.value)]))} - required - defaultValue={settings[1]} - /> - - - The click function will click on the given coordinates. - You need to put the coordinates by yourself. - - - ); -}); diff --git a/src/components/molecules/action-settings/enqueueLinks.tsx b/src/components/molecules/action-settings/enqueueLinks.tsx deleted file mode 100644 index 2c383d47..00000000 --- a/src/components/molecules/action-settings/enqueueLinks.tsx +++ /dev/null @@ -1,32 +0,0 @@ -import React, { forwardRef, useImperativeHandle } from 'react'; -import { Stack, TextField } from "@mui/material"; -import { WarningText } from "../../atoms/texts"; -import WarningIcon from "@mui/icons-material/Warning"; -import InfoIcon from "@mui/icons-material/Info"; - -export const EnqueueLinksSettings = forwardRef((props, ref) => { - const [settings, setSettings] = React.useState(''); - useImperativeHandle(ref, () => ({ - getSettings() { - return settings; - } - })); - - return ( - - setSettings(e.target.value)} - /> - - - Reads elements targeted by the selector and stores their links in a queue. - Those pages are then processed using the same workflow as the initial page - (in parallel if the maxConcurrency parameter is greater than 1). - - - ); -}); diff --git a/src/components/molecules/action-settings/index.ts b/src/components/molecules/action-settings/index.ts index 32906db7..58e3f3c4 100644 --- a/src/components/molecules/action-settings/index.ts +++ b/src/components/molecules/action-settings/index.ts @@ -2,16 +2,10 @@ import { ScrollSettings } from './scroll'; import { ScreenshotSettings } from "./screenshot"; import { ScrapeSettings } from "./scrape"; import { ScrapeSchemaSettings } from "./scrapeSchema"; -import { ScriptSettings } from "./script"; -import { EnqueueLinksSettings } from "./enqueueLinks"; -import { ClickOnCoordinatesSettings } from "./clickOnCoordinates"; export { ScrollSettings, ScreenshotSettings, ScrapeSettings, ScrapeSchemaSettings, - ScriptSettings, - EnqueueLinksSettings, - ClickOnCoordinatesSettings, }; diff --git a/src/components/molecules/action-settings/script.tsx b/src/components/molecules/action-settings/script.tsx deleted file mode 100644 index 2cece263..00000000 --- a/src/components/molecules/action-settings/script.tsx +++ /dev/null @@ -1,63 +0,0 @@ -import React, { forwardRef, useImperativeHandle } from 'react'; -import Editor from 'react-simple-code-editor'; -// @ts-ignore -import { highlight, languages } from 'prismjs/components/prism-core'; -import 'prismjs/components/prism-clike'; -import 'prismjs/components/prism-javascript'; -import 'prismjs/themes/prism.css'; -import styled from "styled-components"; -import InfoIcon from '@mui/icons-material/Info'; -import { WarningText } from "../../atoms/texts"; - -export const ScriptSettings = forwardRef((props, ref) => { - const [code, setCode] = React.useState(''); - - useImperativeHandle(ref, () => ({ - getSettings() { - return code; - } - })); - - return ( - - - - Allows to run an arbitrary asynchronous function evaluated at the server - side accepting the current page instance argument. - - setCode(code)} - highlight={code => highlight(code, languages.js)} - padding={10} - style={{ - fontFamily: '"Fira code", "Fira Mono", monospace', - fontSize: 12, - background: '#f0f0f0', - }} - /> - - ); -}); - -const EditorWrapper = styled.div` - flex: 1; - overflow: auto; - /** hard-coded height */ - height: 100%; - width: 100%; -`; - -const StyledEditor = styled(Editor)` - white-space: pre; - caret-color: #fff; - min-width: 100%; - min-height: 100%; - float: left; - & > textarea, - & > pre { - outline: none; - white-space: pre !important; - } -`; diff --git a/src/components/organisms/BrowserContent.tsx b/src/components/organisms/BrowserContent.tsx index 189af6ee..3b489968 100644 --- a/src/components/organisms/BrowserContent.tsx +++ b/src/components/organisms/BrowserContent.tsx @@ -133,4 +133,4 @@ export const BrowserContent = () => { const BrowserContentWrapper = styled.div` grid-area: browser; -`; +`; \ No newline at end of file diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index d81ff967..fd1589da 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -3,17 +3,52 @@ import { useSocketStore } from '../../context/socket'; import Canvas from "../atoms/canvas"; import { useBrowserDimensionsStore } from "../../context/browserDimensions"; import { Highlighter } from "../atoms/Highlighter"; +import { GenericModal } from '../atoms/GenericModal'; +import { useActionContext } from '../../context/browserActions'; +import { useBrowserSteps } from '../../context/browserSteps'; + +interface ElementInfo { + tagName: string; + hasOnlyText?: boolean; + innerText?: string; + url?: string; + imageUrl?: string; +} + +interface AttributeOption { + label: string; + value: string; +} + +const getAttributeOptions = (tagName: string): AttributeOption[] => { + switch (tagName.toLowerCase()) { + case 'a': + return [ + { label: 'Text', value: 'innerText' }, + { label: 'URL', value: 'href' } + ]; + case 'img': + return [ + { label: 'Alt Text', value: 'alt' }, + { label: 'Source URL', value: 'src' } + ]; + default: + return [{ label: 'Text', value: 'innerText' }]; + } +}; export const BrowserWindow = () => { - const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); - const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null); + const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string, elementInfo: ElementInfo | null; } | null>(null); + const [showAttributeModal, setShowAttributeModal] = useState(false); + const [attributeOptions, setAttributeOptions] = useState([]); + const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null); const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); - - console.log('Use browser dimensions:', width, height) + const { getText } = useActionContext(); + const { addTextStep } = useBrowserSteps(); const onMouseMove = (e: MouseEvent) => { if (canvasRef && canvasRef.current && highlighterData) { @@ -46,13 +81,10 @@ export const BrowserWindow = () => { return () => { socket?.off("screencast", screencastHandler); } - }, [screenShot, canvasRef, socket, screencastHandler]); - - const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string }) => { + const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => { setHighlighterData(data); - console.log('Highlighter Rect via socket:', data.rect) }, [highlighterData]) useEffect(() => { @@ -60,16 +92,94 @@ export const BrowserWindow = () => { if (socket) { socket.on("highlighter", highlighterHandler); } - //cleaning function return () => { document.removeEventListener('mousemove', onMouseMove); socket?.off("highlighter", highlighterHandler); }; }, [socket, onMouseMove]); + const handleClick = (e: React.MouseEvent) => { + if (highlighterData && canvasRef?.current) { + const canvasRect = canvasRef.current.getBoundingClientRect(); + const clickX = e.clientX - canvasRect.left; + const clickY = e.clientY - canvasRect.top; + + const highlightRect = highlighterData.rect; + if ( + clickX >= highlightRect.left && + clickX <= highlightRect.right && + clickY >= highlightRect.top && + clickY <= highlightRect.bottom + ) { + if (getText === true) { + const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); + if (options.length > 1) { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo + }); + setShowAttributeModal(true); + } else { + addTextStep('', highlighterData.elementInfo?.innerText || '', { + selector: highlighterData.selector, + tag: highlighterData.elementInfo?.tagName, + attribute: 'innerText' + }); + } + } + } + } + }; + + const handleAttributeSelection = (attribute: string) => { + if (selectedElement) { + let data = ''; + switch (attribute) { + case 'href': + data = selectedElement.info?.url || ''; + break; + case 'src': + data = selectedElement.info?.imageUrl || ''; + break; + default: + data = selectedElement.info?.innerText || ''; + } + { + if (getText === true) { + addTextStep('', data, { + selector: selectedElement.selector, + tag: selectedElement.info?.tagName, + attribute: attribute + }); + } + } + } + setShowAttributeModal(false); + }; + return ( - <> - {(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? +
+ { + getText === true ? ( + { }} + canBeClosed={false} + > +
+

Select Attribute

+ {attributeOptions.map((option) => ( + + ))} +
+ +
+ ) : null + } + {(getText === true && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? { width={width} height={height} /> - +
); }; @@ -97,8 +207,6 @@ const drawImage = (image: string, canvas: HTMLCanvasElement): void => { img.onload = () => { URL.revokeObjectURL(img.src); ctx?.drawImage(img, 0, 0, 1280, 720); - console.log('Image drawn on canvas:', img.width, img.height); - console.log('Image drawn on canvas:', canvas.width, canvas.height); }; }; \ No newline at end of file diff --git a/src/components/organisms/LeftSidePanel.tsx b/src/components/organisms/LeftSidePanel.tsx index 3319e536..164dc38c 100644 --- a/src/components/organisms/LeftSidePanel.tsx +++ b/src/components/organisms/LeftSidePanel.tsx @@ -2,7 +2,7 @@ import { Box, Paper, Tab, Tabs } from "@mui/material"; import React, { useCallback, useEffect, useState } from "react"; import { getActiveWorkflow, getParamsOfActiveWorkflow } from "../../api/workflow"; import { useSocketStore } from '../../context/socket'; -import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { SidePanelHeader } from "../molecules/SidePanelHeader"; import { emptyWorkflow } from "../../shared/constants"; import { LeftSidePanelContent } from "../molecules/LeftSidePanelContent"; diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 2769c0f1..514db44f 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -1,96 +1,188 @@ -import React, { useEffect, useState } from 'react'; -import { Button, MenuItem, Paper, Stack, Tabs, Tab } from "@mui/material"; -import { Dropdown as MuiDropdown } from '../atoms/DropdownMui'; +import React, { useState, useCallback } from 'react'; +import { Button, Paper, Box, TextField } from "@mui/material"; +import EditIcon from '@mui/icons-material/Edit'; +import TextFieldsIcon from '@mui/icons-material/TextFields'; +import DocumentScannerIcon from '@mui/icons-material/DocumentScanner'; import styled from "styled-components"; -import { ActionSettings } from "../molecules/ActionSettings"; -import { SelectChangeEvent } from "@mui/material/Select/Select"; import { SimpleBox } from "../atoms/Box"; import Typography from "@mui/material/Typography"; import { useGlobalInfoStore } from "../../context/globalInfo"; -import { PairForEdit } from "../../pages/RecordingPage"; +import { useActionContext } from '../../context/browserActions'; +import { useBrowserSteps } from '../../context/browserSteps'; +import { useSocketStore } from '../../context/socket'; +import { ScreenshotSettings } from '../../shared/types'; +import InputAdornment from '@mui/material/InputAdornment'; -interface RightSidePanelProps { - pairForEdit: PairForEdit; -} -export const RightSidePanel = ({pairForEdit}: RightSidePanelProps) => { +export const RightSidePanel = () => { + const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({}); + const [errors, setErrors] = useState<{ [id: number]: string }>({}); + const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({}); - const [content, setContent] = useState('action'); - const [action, setAction] = React.useState(''); - const [isSettingsDisplayed, setIsSettingsDisplayed] = React.useState(false); + const { lastAction, notify } = useGlobalInfoStore(); + const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot } = useActionContext(); + const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps(); + const { socket } = useSocketStore(); - const { lastAction } = useGlobalInfoStore(); - - const handleChange = (event: React.SyntheticEvent, newValue: string) => { - setContent(newValue); - }; - - const handleActionSelect = (event: SelectChangeEvent) => { - const { value } = event.target; - setAction(value); - setIsSettingsDisplayed(true); - }; - - useEffect(() => { - if (content !== 'detail' && pairForEdit.pair !== null) { - setContent('detail'); + const handleTextLabelChange = (id: number, label: string) => { + setTextLabels(prevLabels => ({ ...prevLabels, [id]: label })); + if (!label.trim()) { + setErrors(prevErrors => ({ ...prevErrors, [id]: 'Label cannot be empty' })); + } else { + setErrors(prevErrors => ({ ...prevErrors, [id]: '' })); } - }, [pairForEdit]) + }; + + const handleTextStepConfirm = (id: number) => { + const label = textLabels[id]?.trim(); + if (label) { + updateBrowserTextStepLabel(id, label); + setConfirmedTextSteps(prev => ({ ...prev, [id]: true })); + } else { + setErrors(prevErrors => ({ ...prevErrors, [id]: 'Label cannot be empty' })); + } + }; + + const handleTextStepDiscard = (id: number) => { + deleteBrowserStep(id); + setTextLabels(prevLabels => { + const { [id]: _, ...rest } = prevLabels; + return rest; + }); + setErrors(prevErrors => { + const { [id]: _, ...rest } = prevErrors; + return rest; + }); + }; + + const getTextSettingsObject = useCallback(() => { + const settings: Record = {}; + browserSteps.forEach(step => { + if (step.type === 'text' && step.label && step.selectorObj?.selector) { + settings[step.label] = step.selectorObj; + } + }); + return settings; + }, [browserSteps]); + + + const stopCaptureAndEmitGetTextSettings = useCallback(() => { + const hasUnconfirmedTextSteps = browserSteps.some(step => step.type === 'text' && !confirmedTextSteps[step.id]); + if (hasUnconfirmedTextSteps) { + notify('error', 'Please confirm no labels are empty'); + return; + } + stopGetText(); + const settings = getTextSettingsObject(); + const hasTextSteps = browserSteps.some(step => step.type === 'text'); + if (hasTextSteps) { + socket?.emit('action', { action: 'scrapeSchema', settings }); + } + }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]); + + const captureScreenshot = (fullPage: boolean) => { + const screenshotSettings: ScreenshotSettings = { + fullPage, + type: 'png', + timeout: 30000, + animations: 'allow', + caret: 'hide', + scale: 'device', + }; + socket?.emit('action', { action: 'screenshot', settings: screenshotSettings }); + addScreenshotStep(fullPage); + stopGetScreenshot(); + }; return ( - + - - Last action: - {` ${lastAction}`} - + Last action: {` ${lastAction}`} - {content === 'action' ? ( - - Type of action: - - - click on coordinates - enqueueLinks - scrape - scrapeSchema - screenshot - script - scroll - - + + {!getText && !getScreenshot && } + {getText && + <> + + + + + + } - {isSettingsDisplayed && - + {!getText && !getScreenshot && } + {getScreenshot && ( + + + + + + )} + + + + {browserSteps.map(step => ( + + { + step.type === 'text' ? ( + <> + handleTextLabelChange(step.id, e.target.value)} + fullWidth + margin="normal" + error={!!errors[step.id]} + helperText={errors[step.id]} + InputProps={{ + readOnly: confirmedTextSteps[step.id], + startAdornment: ( + + + + ) + }} + /> + + + + ) + }} + /> + {!confirmedTextSteps[step.id] && ( + + + + + )} + + ) : ( + step.type === 'screenshot' && ( + + + + {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} + + + ) + ) } - - ) - : null - } + + ))} + ); }; -const ActionTypeWrapper = styled.div` - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; - margin-top: 20px; -`; - export const ActionDescription = styled.p` margin-left: 15px; `; diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx new file mode 100644 index 00000000..50d86777 --- /dev/null +++ b/src/context/browserActions.tsx @@ -0,0 +1,37 @@ +import React, { createContext, useContext, useState, ReactNode } from 'react'; + +interface ActionContextProps { + getText: boolean; + getScreenshot: boolean; + startGetText: () => void; + stopGetText: () => void; + startGetScreenshot: () => void; + stopGetScreenshot: () => void; +} + +const ActionContext = createContext(undefined); + +export const ActionProvider = ({ children }: { children: ReactNode }) => { + const [getText, setGetText] = useState(false); + const [getScreenshot, setGetScreenshot] = useState(false); + + const startGetText = () => setGetText(true); + const stopGetText = () => setGetText(false); + + const startGetScreenshot = () => setGetScreenshot(true); + const stopGetScreenshot = () => setGetScreenshot(false); + + return ( + + {children} + + ); +}; + +export const useActionContext = () => { + const context = useContext(ActionContext); + if (context === undefined) { + throw new Error('useActionContext must be used within an ActionProvider'); + } + return context; +}; \ No newline at end of file diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx new file mode 100644 index 00000000..e2984e53 --- /dev/null +++ b/src/context/browserSteps.tsx @@ -0,0 +1,85 @@ +import React, { createContext, useContext, useState } from 'react'; + +interface TextStep { + id: number; + type: 'text'; + label: string; + data: string; + selectorObj: SelectorObject; +} + +interface ScreenshotStep { + id: number; + type: 'screenshot'; + fullPage: boolean; +} + + +type BrowserStep = TextStep | ScreenshotStep; + +interface SelectorObject { + selector: string; + tag?: string; + attribute?: string; + [key: string]: any; +} + +interface BrowserStepsContextType { + browserSteps: BrowserStep[]; + addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void; + addScreenshotStep: (fullPage: boolean) => void; + deleteBrowserStep: (id: number) => void; + updateBrowserTextStepLabel: (id: number, newLabel: string) => void; +} + +const BrowserStepsContext = createContext(undefined); + +export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const [browserSteps, setBrowserSteps] = useState([]); + + const addTextStep = (label: string, data: string, selectorObj: SelectorObject) => { + setBrowserSteps(prevSteps => [ + ...prevSteps, + { id: Date.now(), type: 'text', label, data, selectorObj } + ]); + }; + + const addScreenshotStep = (fullPage: boolean) => { + setBrowserSteps(prevSteps => [ + ...prevSteps, + { id: Date.now(), type: 'screenshot', fullPage } + ]); + }; + + const deleteBrowserStep = (id: number) => { + setBrowserSteps(prevSteps => prevSteps.filter(step => step.id !== id)); + }; + + const updateBrowserTextStepLabel = (id: number, newLabel: string) => { + setBrowserSteps(prevSteps => + prevSteps.map(step => + step.id === id ? { ...step, label: newLabel } : step + ) + ); + }; + + return ( + + {children} + + ); +}; + +export const useBrowserSteps = () => { + const context = useContext(BrowserStepsContext); + if (!context) { + throw new Error('useBrowserSteps must be used within a BrowserStepsProvider'); + } + return context; +}; diff --git a/src/index.css b/src/index.css index 21808ea5..1d4c3871 100644 --- a/src/index.css +++ b/src/index.css @@ -5,6 +5,12 @@ body { sans-serif; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; + width: 100%; + height: 100%; + margin: 0; + padding: 0; + scrollbar-gutter: stable; + overflow: hidden; } code { diff --git a/src/pages/PageWrappper.tsx b/src/pages/PageWrappper.tsx index 00e83a39..686d4cd4 100644 --- a/src/pages/PageWrappper.tsx +++ b/src/pages/PageWrappper.tsx @@ -15,7 +15,7 @@ export const PageWrapper = () => { const [recordingName, setRecordingName] = useState(''); const [open, setOpen] = useState(false); - const { browserId, setBrowserId, notification } = useGlobalInfoStore(); + const { browserId, setBrowserId, notification } = useGlobalInfoStore(); const handleNewRecording = () => { setBrowserId('new-recording'); @@ -27,15 +27,15 @@ export const PageWrapper = () => { setBrowserId('new-recording'); } - const isNotification = (): boolean=> { - if (notification.isOpen && !open){ + const isNotification = (): boolean => { + if (notification.isOpen && !open) { setOpen(true); } return notification.isOpen; } useEffect(() => { - const isRecordingInProgress = async() => { + const isRecordingInProgress = async () => { const id = await getActiveBrowserId(); if (id) { setBrowserId(id); @@ -48,26 +48,26 @@ export const PageWrapper = () => {
- - {browserId - ? ( - - - - - - - ) - : - } + + {browserId + ? ( + + + + + + + ) + : + } - { isNotification() ? + {isNotification() ? + message={notification.message} + isOpen={notification.isOpen} /> : null }
diff --git a/src/pages/RecordingPage.tsx b/src/pages/RecordingPage.tsx index b3bcedea..45b422b2 100644 --- a/src/pages/RecordingPage.tsx +++ b/src/pages/RecordingPage.tsx @@ -7,9 +7,11 @@ import { RightSidePanel } from "../components/organisms/RightSidePanel"; import { Loader } from "../components/atoms/Loader"; import { useSocketStore } from "../context/socket"; import { useBrowserDimensionsStore } from "../context/browserDimensions"; +import { ActionProvider } from "../context/browserActions" +import { BrowserStepsProvider } from '../context/browserSteps'; import { useGlobalInfoStore } from "../context/globalInfo"; import { editRecordingFromStorage } from "../api/storage"; -import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import { WhereWhatPair } from "maxun-core"; import styled from "styled-components"; interface RecordingPageProps { @@ -104,26 +106,30 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { }, [socket, handleLoaded]); return ( -
- {isLoaded ? - - - - - - - - - - - - : } -
+ + +
+ {isLoaded ? + + + + + + + + + + + + : } +
+
+
); }; diff --git a/src/shared/constants.ts b/src/shared/constants.ts index d7ce0bd0..de805266 100644 --- a/src/shared/constants.ts +++ b/src/shared/constants.ts @@ -1,3 +1,3 @@ -import { WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WorkflowFile } from "maxun-core"; export const emptyWorkflow: WorkflowFile = { workflow: [] }; diff --git a/src/shared/types.ts b/src/shared/types.ts index 72e86a0e..aa5f254e 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -1,4 +1,4 @@ -import { WorkflowFile } from "@wbr-project/wbr-interpret"; +import { WorkflowFile } from "maxun-core"; import { Locator } from "playwright"; export type Workflow = WorkflowFile["workflow"]; @@ -15,7 +15,7 @@ export interface ScreenshotSettings { fullPage?: boolean; mask?: Locator[]; omitBackground?: boolean; - // is this still needed? - @wbr-project/wbr-interpret outputs to a binary output + // is this still needed? - maxun-core outputs to a binary output path?: string; quality?: number; scale?: "css" | "device";