Merge pull request #21 from amhsirak/develop
feat: browser actions revamp
This commit is contained in:
10
maxun-core/jest.config.js
Normal file
10
maxun-core/jest.config.js
Normal file
@@ -0,0 +1,10 @@
|
||||
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
globals: {
|
||||
'ts-jest': {
|
||||
isolatedModules: true
|
||||
}
|
||||
}
|
||||
};
|
||||
29
maxun-core/package.json
Normal file
29
maxun-core/package.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "maxun-core",
|
||||
"version": "0.0.1",
|
||||
"description": "Smart Workflow interpreter",
|
||||
"main": "build/index.js",
|
||||
"typings": "build/index.d.ts",
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"build": "npm run clean && tsc",
|
||||
"lint": "eslint .",
|
||||
"clean": "rimraf ./build"
|
||||
},
|
||||
"files": [
|
||||
"build/*"
|
||||
],
|
||||
"keywords": [
|
||||
"web",
|
||||
"automation",
|
||||
"workflow",
|
||||
"interpret",
|
||||
"scraping"
|
||||
],
|
||||
"author": "Karishma Shukla",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"joi": "^17.6.0",
|
||||
"playwright": "^1.20.1"
|
||||
}
|
||||
}
|
||||
253
maxun-core/src/browserSide/scraper.js
Normal file
253
maxun-core/src/browserSide/scraper.js
Normal file
@@ -0,0 +1,253 @@
|
||||
/* eslint-disable @typescript-eslint/no-unused-vars */
|
||||
|
||||
const area = (element) => element.offsetHeight * element.offsetWidth;
|
||||
|
||||
function getBiggestElement(selector) {
|
||||
const elements = Array.from(document.querySelectorAll(selector));
|
||||
const biggest = elements.reduce(
|
||||
(max, elem) => (
|
||||
area(elem) > area(max) ? elem : max),
|
||||
{ offsetHeight: 0, offsetWidth: 0 },
|
||||
);
|
||||
return biggest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates structural selector (describing element by its DOM tree location).
|
||||
*
|
||||
* **The generated selector is not guaranteed to be unique!** (In fact, this is
|
||||
* the desired behaviour in here.)
|
||||
* @param {HTMLElement} element Element being described.
|
||||
* @returns {string} CSS-compliant selector describing the element's location in the DOM tree.
|
||||
*/
|
||||
function GetSelectorStructural(element) {
|
||||
// Base conditions for the recursive approach.
|
||||
if (element.tagName === 'BODY') {
|
||||
return 'BODY';
|
||||
}
|
||||
const selector = element.tagName;
|
||||
if (element.parentElement) {
|
||||
return `${GetSelectorStructural(element.parentElement)} > ${selector}`;
|
||||
}
|
||||
|
||||
return selector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic method to find collections of "interesting" items on the page.
|
||||
* @returns {Array<HTMLElement>} A collection of interesting DOM nodes
|
||||
* (online store products, plane tickets, list items... and many more?)
|
||||
*/
|
||||
function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') {
|
||||
const restoreScroll = (() => {
|
||||
const { scrollX, scrollY } = window;
|
||||
return () => {
|
||||
window.scrollTo(scrollX, scrollY);
|
||||
};
|
||||
})();
|
||||
|
||||
/**
|
||||
* @typedef {Array<{x: number, y: number}>} Grid
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns an array of grid-aligned {x,y} points.
|
||||
* @param {number} [granularity=0.005] sets the number of generated points
|
||||
* (the higher the granularity, the more points).
|
||||
* @returns {Grid} Array of {x, y} objects.
|
||||
*/
|
||||
function getGrid(startX = 0, startY = 0, granularity = 0.005) {
|
||||
const width = window.innerWidth;
|
||||
const height = window.innerHeight;
|
||||
|
||||
const out = [];
|
||||
for (let x = 0; x < width; x += 1 / granularity) {
|
||||
for (let y = 0; y < height; y += 1 / granularity) {
|
||||
out.push({ x: startX + x, y: startY + y });
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
let maxSelector = { selector: 'body', metric: 0 };
|
||||
|
||||
const updateMaximumWithPoint = (point) => {
|
||||
const currentElement = document.elementFromPoint(point.x, point.y);
|
||||
const selector = GetSelectorStructural(currentElement);
|
||||
|
||||
const elements = Array.from(document.querySelectorAll(selector))
|
||||
.filter((element) => area(element) > minArea);
|
||||
|
||||
// If the current selector targets less than three elements,
|
||||
// we consider it not interesting (would be a very underwhelming scraper)
|
||||
if (elements.length < 3) {
|
||||
return;
|
||||
}
|
||||
|
||||
let metric = null;
|
||||
|
||||
if (metricType === 'total_area') {
|
||||
metric = elements
|
||||
.reduce((p, x) => p + area(x), 0);
|
||||
} else if (metricType === 'size_deviation') {
|
||||
// This could use a proper "statistics" approach... but meh, so far so good!
|
||||
const sizes = elements
|
||||
.map((element) => area(element));
|
||||
|
||||
metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes));
|
||||
}
|
||||
|
||||
// console.debug(`Total ${metricType} is ${metric}.`)
|
||||
if (metric > maxSelector.metric && elements.length < maxCountPerPage) {
|
||||
maxSelector = { selector, metric };
|
||||
}
|
||||
};
|
||||
|
||||
for (let scroll = 0; scroll < scrolls; scroll += 1) {
|
||||
window.scrollTo(0, scroll * window.innerHeight);
|
||||
|
||||
const grid = getGrid();
|
||||
|
||||
grid.forEach(updateMaximumWithPoint);
|
||||
}
|
||||
|
||||
restoreScroll();
|
||||
|
||||
let out = Array.from(document.querySelectorAll(maxSelector.selector));
|
||||
|
||||
const different = (x, i, a) => a.findIndex((e) => e === x) === i;
|
||||
// as long as we don't merge any two elements by substituing them for their parents,
|
||||
// we substitute.
|
||||
while (out.map((x) => x.parentElement).every(different)
|
||||
&& out.forEach((x) => x.parentElement !== null)) {
|
||||
out = out.map((x) => x.parentElement ?? x);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a "scrape" result from the current page.
|
||||
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
|
||||
*/
|
||||
// Wrap the entire function in an IIFE (Immediately Invoked Function Expression)
|
||||
// and attach it to the window object
|
||||
(function (window) {
|
||||
/**
|
||||
* Returns a "scrape" result from the current page.
|
||||
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
|
||||
*/
|
||||
window.scrape = function (selector = null) {
|
||||
/**
|
||||
* **crudeRecords** contains uncurated rundowns of "scrapable" elements
|
||||
* @type {Array<Object>}
|
||||
*/
|
||||
const crudeRecords = (selector
|
||||
? Array.from(document.querySelectorAll(selector))
|
||||
: scrapableHeuristics())
|
||||
.map((record) => ({
|
||||
...Array.from(record.querySelectorAll('img'))
|
||||
.reduce((p, x, i) => {
|
||||
let url = null;
|
||||
if (x.srcset) {
|
||||
const urls = x.srcset.split(', ');
|
||||
[url] = urls[urls.length - 1].split(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains the largest elements from `srcset` - if `srcset` is not present, contains
|
||||
* URL from the `src` attribute
|
||||
*
|
||||
* If the `src` attribute contains a data url, imgUrl contains `undefined`.
|
||||
*/
|
||||
let imgUrl;
|
||||
if (x.srcset) {
|
||||
imgUrl = url;
|
||||
} else if (x.src.indexOf('data:') === -1) {
|
||||
imgUrl = x.src;
|
||||
}
|
||||
|
||||
return ({
|
||||
...p,
|
||||
...(imgUrl ? { [`img_${i}`]: imgUrl } : {}),
|
||||
});
|
||||
}, {}),
|
||||
...record.innerText.split('\n')
|
||||
.reduce((p, x, i) => ({
|
||||
...p,
|
||||
[`record_${String(i).padStart(4, '0')}`]: x.trim(),
|
||||
}), {}),
|
||||
}));
|
||||
|
||||
return crudeRecords;
|
||||
};
|
||||
|
||||
/**
|
||||
* Given an object with named lists of elements,
|
||||
* groups the elements by their distance in the DOM tree.
|
||||
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
|
||||
* @returns {Array.<Object.<string, string>>}
|
||||
*/
|
||||
window.scrapeSchema = function (lists) {
|
||||
function omap(object, f, kf = (x) => x) {
|
||||
return Object.fromEntries(
|
||||
Object.entries(object)
|
||||
.map(([k, v]) => [kf(k), f(v)]),
|
||||
);
|
||||
}
|
||||
|
||||
function ofilter(object, f) {
|
||||
return Object.fromEntries(
|
||||
Object.entries(object)
|
||||
.filter(([k, v]) => f(k, v)),
|
||||
);
|
||||
}
|
||||
|
||||
function getSeedKey(listObj) {
|
||||
const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length)));
|
||||
return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0];
|
||||
}
|
||||
|
||||
function getMBEs(elements) {
|
||||
return elements.map((element) => {
|
||||
let candidate = element;
|
||||
const isUniqueChild = (e) => elements
|
||||
.filter((elem) => e.parentNode?.contains(elem))
|
||||
.length === 1;
|
||||
|
||||
while (candidate && isUniqueChild(candidate)) {
|
||||
candidate = candidate.parentNode;
|
||||
}
|
||||
|
||||
return candidate;
|
||||
});
|
||||
}
|
||||
|
||||
const seedName = getSeedKey(lists);
|
||||
const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector));
|
||||
const MBEs = getMBEs(seedElements);
|
||||
|
||||
return MBEs.map((mbe) => omap(
|
||||
lists,
|
||||
({ selector, attribute }, key) => {
|
||||
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
|
||||
if (!elem) return undefined;
|
||||
|
||||
switch (attribute) {
|
||||
case 'href':
|
||||
return elem.getAttribute('href');
|
||||
case 'src':
|
||||
return elem.getAttribute('src');
|
||||
case 'innerText':
|
||||
return elem.innerText;
|
||||
case 'textContent':
|
||||
return elem.textContent;
|
||||
default:
|
||||
return elem.innerText;
|
||||
}
|
||||
},
|
||||
(key) => key // Use the original key in the output
|
||||
));
|
||||
}
|
||||
|
||||
})(window);
|
||||
8
maxun-core/src/index.ts
Normal file
8
maxun-core/src/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import Interpreter from './interpret';
|
||||
|
||||
export default Interpreter;
|
||||
export { default as Preprocessor } from './preprocessor';
|
||||
export type {
|
||||
WorkflowFile, WhereWhatPair, Where, What,
|
||||
} from './types/workflow';
|
||||
export { unaryOperators, naryOperators, meta as metaOperators } from './types/logic';
|
||||
457
maxun-core/src/interpret.ts
Normal file
457
maxun-core/src/interpret.ts
Normal file
@@ -0,0 +1,457 @@
|
||||
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
||||
import { Page, PageScreenshotOptions } from 'playwright';
|
||||
import path from 'path';
|
||||
|
||||
import { EventEmitter } from 'events';
|
||||
import {
|
||||
Where, What, PageState, Workflow, WorkflowFile,
|
||||
ParamType, SelectorArray, CustomFunctions,
|
||||
} from './types/workflow';
|
||||
|
||||
import { operators, meta } from './types/logic';
|
||||
import { arrayToObject } from './utils/utils';
|
||||
import Concurrency from './utils/concurrency';
|
||||
import Preprocessor from './preprocessor';
|
||||
import log, { Level } from './utils/logger';
|
||||
|
||||
/**
|
||||
* Defines optional intepreter options (passed in constructor)
|
||||
*/
|
||||
interface InterpreterOptions {
|
||||
maxRepeats: number;
|
||||
maxConcurrency: number;
|
||||
serializableCallback: (output: any) => (void | Promise<void>);
|
||||
binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
|
||||
debug: boolean;
|
||||
debugChannel: Partial<{
|
||||
activeId: Function,
|
||||
debugMessage: Function,
|
||||
}>
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for running the Smart Workflows.
|
||||
*/
|
||||
export default class Interpreter extends EventEmitter {
|
||||
private workflow: Workflow;
|
||||
|
||||
private initializedWorkflow: Workflow | null;
|
||||
|
||||
private options: InterpreterOptions;
|
||||
|
||||
private concurrency: Concurrency;
|
||||
|
||||
private stopper: Function | null = null;
|
||||
|
||||
private log: typeof log;
|
||||
|
||||
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
|
||||
super();
|
||||
this.workflow = workflow.workflow;
|
||||
this.initializedWorkflow = null;
|
||||
this.options = {
|
||||
maxRepeats: 5,
|
||||
maxConcurrency: 5,
|
||||
serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); },
|
||||
binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); },
|
||||
debug: false,
|
||||
debugChannel: {},
|
||||
...options,
|
||||
};
|
||||
this.concurrency = new Concurrency(this.options.maxConcurrency);
|
||||
this.log = (...args) => log(...args);
|
||||
|
||||
const error = Preprocessor.validateWorkflow(workflow);
|
||||
if (error) {
|
||||
throw (error);
|
||||
}
|
||||
|
||||
if (this.options.debugChannel?.debugMessage) {
|
||||
const oldLog = this.log;
|
||||
// @ts-ignore
|
||||
this.log = (...args: Parameters<typeof oldLog>) => {
|
||||
if (args[1] !== Level.LOG) {
|
||||
this.options.debugChannel.debugMessage!(typeof args[0] === 'string' ? args[0] : args[0].message);
|
||||
}
|
||||
oldLog(...args);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the context object from given Page and the current workflow.\
|
||||
* \
|
||||
* `workflow` is used for selector extraction - function searches for used selectors to
|
||||
* look for later in the page's context.
|
||||
* @param page Playwright Page object
|
||||
* @param workflow Current **initialized** workflow (array of where-what pairs).
|
||||
* @returns {PageState} State of the current page.
|
||||
*/
|
||||
private async getState(page: Page, workflow: Workflow): Promise<PageState> {
|
||||
/**
|
||||
* All the selectors present in the current Workflow
|
||||
*/
|
||||
const selectors = Preprocessor.extractSelectors(workflow);
|
||||
|
||||
/**
|
||||
* Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability).
|
||||
* @param selector Selector to be queried
|
||||
* @returns True if the targetted element is actionable, false otherwise.
|
||||
*/
|
||||
const actionable = async (selector: string): Promise<boolean> => {
|
||||
try {
|
||||
const proms = [
|
||||
page.isEnabled(selector, { timeout: 500 }),
|
||||
page.isVisible(selector, { timeout: 500 }),
|
||||
];
|
||||
|
||||
return await Promise.all(proms).then((bools) => bools.every((x) => x));
|
||||
} catch (e) {
|
||||
// log(<Error>e, Level.ERROR);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Object of selectors present in the current page.
|
||||
*/
|
||||
const presentSelectors: SelectorArray = await Promise.all(
|
||||
selectors.map(async (selector) => {
|
||||
if (await actionable(selector)) {
|
||||
return [selector];
|
||||
}
|
||||
return [];
|
||||
}),
|
||||
).then((x) => x.flat());
|
||||
|
||||
return {
|
||||
url: page.url(),
|
||||
cookies: (await page.context().cookies([page.url()]))
|
||||
.reduce((p, cookie) => (
|
||||
{
|
||||
...p,
|
||||
[cookie.name]: cookie.value,
|
||||
}), {}),
|
||||
selectors: presentSelectors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if the given action is applicable with the given context.
|
||||
* @param where Tested *where* condition
|
||||
* @param context Current browser context.
|
||||
* @returns True if `where` is applicable in the given context, false otherwise
|
||||
*/
|
||||
private applicable(where: Where, context: PageState, usedActions: string[] = []): boolean {
|
||||
/**
|
||||
* Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\
|
||||
* \
|
||||
* For every key in `subset`, there must be a corresponding key with equal scalar
|
||||
* value in `superset`, or `inclusive(subset[key], superset[key])` must hold.
|
||||
* @param subset Arbitrary non-cyclic JS object (where clause)
|
||||
* @param superset Arbitrary non-cyclic JS object (browser context)
|
||||
* @returns `true` if `subset <= superset`, `false` otherwise.
|
||||
*/
|
||||
const inclusive = (subset: Record<string, unknown>, superset: Record<string, unknown>)
|
||||
: boolean => (
|
||||
Object.entries(subset).every(
|
||||
([key, value]) => {
|
||||
/**
|
||||
* Arrays are compared without order (are transformed into objects before comparison).
|
||||
*/
|
||||
const parsedValue = Array.isArray(value) ? arrayToObject(value) : value;
|
||||
|
||||
const parsedSuperset: Record<string, unknown> = {};
|
||||
parsedSuperset[key] = Array.isArray(superset[key])
|
||||
? arrayToObject(<any>superset[key])
|
||||
: superset[key];
|
||||
|
||||
// Every `subset` key must exist in the `superset` and
|
||||
// have the same value (strict equality), or subset[key] <= superset[key]
|
||||
return parsedSuperset[key]
|
||||
&& (
|
||||
(parsedSuperset[key] === parsedValue)
|
||||
|| ((parsedValue).constructor.name === 'RegExp' && (<RegExp>parsedValue).test(<string>parsedSuperset[key]))
|
||||
|| (
|
||||
(parsedValue).constructor.name !== 'RegExp'
|
||||
&& typeof parsedValue === 'object' && inclusive(<typeof subset>parsedValue, <typeof superset>parsedSuperset[key])
|
||||
)
|
||||
);
|
||||
},
|
||||
)
|
||||
);
|
||||
|
||||
// Every value in the "where" object should be compliant to the current state.
|
||||
return Object.entries(where).every(
|
||||
([key, value]) => {
|
||||
if (operators.includes(<any>key)) {
|
||||
const array = Array.isArray(value)
|
||||
? value as Where[]
|
||||
: Object.entries(value).map((a) => Object.fromEntries([a]));
|
||||
// every condition is treated as a single context
|
||||
|
||||
switch (key as keyof typeof operators) {
|
||||
case '$and':
|
||||
return array?.every((x) => this.applicable(x, context));
|
||||
case '$or':
|
||||
return array?.some((x) => this.applicable(x, context));
|
||||
case '$not':
|
||||
return !this.applicable(<Where>value, context); // $not should be a unary operator
|
||||
default:
|
||||
throw new Error('Undefined logic operator.');
|
||||
}
|
||||
} else if (meta.includes(<any>key)) {
|
||||
const testRegexString = (x: string) => {
|
||||
if (typeof value === 'string') {
|
||||
return x === value;
|
||||
}
|
||||
|
||||
return (<RegExp><unknown>value).test(x);
|
||||
};
|
||||
|
||||
switch (key as keyof typeof meta) {
|
||||
case '$before':
|
||||
return !usedActions.find(testRegexString);
|
||||
case '$after':
|
||||
return !!usedActions.find(testRegexString);
|
||||
default:
|
||||
throw new Error('Undefined meta operator.');
|
||||
}
|
||||
} else {
|
||||
// Current key is a base condition (url, cookies, selectors)
|
||||
return inclusive({ [key]: value }, context);
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Playwright's page object and a "declarative" list of actions, this function
|
||||
* calls all mentioned functions on the Page object.\
|
||||
* \
|
||||
* Manipulates the iterator indexes (experimental feature, likely to be removed in
|
||||
* the following versions of waw-interpreter)
|
||||
* @param page Playwright Page object
|
||||
* @param steps Array of actions.
|
||||
*/
|
||||
private async carryOutSteps(page: Page, steps: What[]): Promise<void> {
|
||||
/**
|
||||
* Defines overloaded (or added) methods/actions usable in the workflow.
|
||||
* If a method overloads any existing method of the Page class, it accepts the same set
|
||||
* of parameters *(but can override some!)*\
|
||||
* \
|
||||
* Also, following piece of code defines functions to be run in the browser's context.
|
||||
* Beware of false linter errors - here, we know better!
|
||||
*/
|
||||
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
|
||||
screenshot: async (params: PageScreenshotOptions) => {
|
||||
const screenshotBuffer = await page.screenshot({
|
||||
...params, path: undefined,
|
||||
});
|
||||
await this.options.binaryCallback(screenshotBuffer, 'image/png');
|
||||
},
|
||||
enqueueLinks: async (selector: string) => {
|
||||
const links: string[] = await page.locator(selector)
|
||||
.evaluateAll(
|
||||
// @ts-ignore
|
||||
(elements) => elements.map((a) => a.href).filter((x) => x),
|
||||
);
|
||||
const context = page.context();
|
||||
|
||||
for (const link of links) {
|
||||
// eslint-disable-next-line
|
||||
this.concurrency.addJob(async () => {
|
||||
try {
|
||||
const newPage = await context.newPage();
|
||||
await newPage.goto(link);
|
||||
await newPage.waitForLoadState('networkidle');
|
||||
await this.runLoop(newPage, this.initializedWorkflow!);
|
||||
} catch (e) {
|
||||
// `runLoop` uses soft mode, so it recovers from it's own exceptions
|
||||
// but newPage(), goto() and waitForLoadState() don't (and will kill
|
||||
// the interpreter by throwing).
|
||||
this.log(<Error>e, Level.ERROR);
|
||||
}
|
||||
});
|
||||
}
|
||||
await page.close();
|
||||
},
|
||||
scrape: async (selector?: string) => {
|
||||
await this.ensureScriptsLoaded(page);
|
||||
|
||||
const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector);
|
||||
await this.options.serializableCallback(scrapeResults);
|
||||
},
|
||||
|
||||
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; }>) => {
|
||||
await this.ensureScriptsLoaded(page);
|
||||
|
||||
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
||||
await this.options.serializableCallback(scrapeResult);
|
||||
},
|
||||
|
||||
scroll: async (pages?: number) => {
|
||||
await page.evaluate(async (pagesInternal) => {
|
||||
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
|
||||
// @ts-ignore
|
||||
window.scrollTo(0, window.scrollY + window.innerHeight);
|
||||
}
|
||||
}, pages ?? 1);
|
||||
},
|
||||
script: async (code: string) => {
|
||||
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
|
||||
async () => { },
|
||||
).constructor;
|
||||
const x = new AsyncFunction('page', 'log', code);
|
||||
await x(page, this.log);
|
||||
},
|
||||
flag: async () => new Promise((res) => {
|
||||
this.emit('flag', page, res);
|
||||
}),
|
||||
};
|
||||
|
||||
for (const step of steps) {
|
||||
this.log(`Launching ${step.action}`, Level.LOG);
|
||||
|
||||
if (step.action in wawActions) {
|
||||
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
||||
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
|
||||
await wawActions[step.action as CustomFunctions](...(params ?? []));
|
||||
} else {
|
||||
// Implements the dot notation for the "method name" in the workflow
|
||||
const levels = step.action.split('.');
|
||||
const methodName = levels[levels.length - 1];
|
||||
|
||||
let invokee: any = page;
|
||||
for (const level of levels.splice(0, levels.length - 1)) {
|
||||
invokee = invokee[level];
|
||||
}
|
||||
|
||||
if (!step.args || Array.isArray(step.args)) {
|
||||
await (<any>invokee[methodName])(...(step.args ?? []));
|
||||
} else {
|
||||
await (<any>invokee[methodName])(step.args);
|
||||
}
|
||||
}
|
||||
|
||||
await new Promise((res) => { setTimeout(res, 500); });
|
||||
}
|
||||
}
|
||||
|
||||
private async runLoop(p: Page, workflow: Workflow) {
|
||||
const usedActions: string[] = [];
|
||||
let lastAction = null;
|
||||
let repeatCount = 0;
|
||||
|
||||
/**
|
||||
* Enables the interpreter functionality for popup windows.
|
||||
* User-requested concurrency should be entirely managed by the concurrency manager,
|
||||
* e.g. via `enqueueLinks`.
|
||||
*/
|
||||
p.on('popup', (popup) => {
|
||||
this.concurrency.addJob(() => this.runLoop(popup, workflow));
|
||||
});
|
||||
|
||||
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
||||
while (true) {
|
||||
// Checks whether the page was closed from outside,
|
||||
// or the workflow execution has been stopped via `interpreter.stop()`
|
||||
if (p.isClosed() || !this.stopper) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await p.waitForLoadState();
|
||||
} catch (e) {
|
||||
await p.close();
|
||||
return;
|
||||
}
|
||||
|
||||
let pageState = {};
|
||||
try {
|
||||
pageState = await this.getState(p, workflow);
|
||||
} catch (e: any) {
|
||||
this.log('The browser has been closed.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.options.debug) {
|
||||
this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN);
|
||||
}
|
||||
const actionId = workflow.findIndex(
|
||||
(step) => this.applicable(step.where, pageState, usedActions),
|
||||
);
|
||||
|
||||
const action = workflow[actionId];
|
||||
|
||||
this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG);
|
||||
|
||||
if (action) { // action is matched
|
||||
if (this.options.debugChannel?.activeId) {
|
||||
this.options.debugChannel.activeId(actionId);
|
||||
}
|
||||
|
||||
repeatCount = action === lastAction ? repeatCount + 1 : 0;
|
||||
if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) {
|
||||
return;
|
||||
}
|
||||
lastAction = action;
|
||||
|
||||
try {
|
||||
await this.carryOutSteps(p, action.what);
|
||||
usedActions.push(action.id ?? 'undefined');
|
||||
} catch (e) {
|
||||
this.log(<Error>e, Level.ERROR);
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async ensureScriptsLoaded(page: Page) {
|
||||
const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function');
|
||||
if (!isScriptLoaded) {
|
||||
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawns a browser context and runs given workflow.
|
||||
* \
|
||||
* Resolves after the playback is finished.
|
||||
* @param {Page} [page] Page to run the workflow on.
|
||||
* @param {ParamType} params Workflow specific, set of parameters
|
||||
* for the `{$param: nameofparam}` fields.
|
||||
*/
|
||||
public async run(page: Page, params?: ParamType): Promise<void> {
|
||||
if (this.stopper) {
|
||||
throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.');
|
||||
}
|
||||
/**
|
||||
* `this.workflow` with the parameters initialized.
|
||||
*/
|
||||
this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params);
|
||||
|
||||
await this.ensureScriptsLoaded(page);
|
||||
|
||||
this.stopper = () => {
|
||||
this.stopper = null;
|
||||
};
|
||||
|
||||
this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!));
|
||||
|
||||
await this.concurrency.waitForCompletion();
|
||||
|
||||
this.stopper = null;
|
||||
}
|
||||
|
||||
public async stop(): Promise<void> {
|
||||
if (this.stopper) {
|
||||
await this.stopper();
|
||||
this.stopper = null;
|
||||
} else {
|
||||
throw new Error('Cannot stop, there is no running workflow!');
|
||||
}
|
||||
}
|
||||
}
|
||||
179
maxun-core/src/preprocessor.ts
Normal file
179
maxun-core/src/preprocessor.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import Joi from 'joi';
|
||||
import {
|
||||
Workflow, WorkflowFile, ParamType, SelectorArray, Where,
|
||||
} from './types/workflow';
|
||||
import { operators } from './types/logic';
|
||||
|
||||
/**
|
||||
* Class for static processing the workflow files/objects.
|
||||
*/
|
||||
export default class Preprocessor {
|
||||
static validateWorkflow(workflow: WorkflowFile): any {
|
||||
const regex = Joi.object({
|
||||
$regex: Joi.string().required(),
|
||||
});
|
||||
|
||||
const whereSchema = Joi.object({
|
||||
url: [Joi.string().uri(), regex],
|
||||
selectors: Joi.array().items(Joi.string()),
|
||||
cookies: Joi.object({}).pattern(Joi.string(), Joi.string()),
|
||||
$after: [Joi.string(), regex],
|
||||
$before: [Joi.string(), regex],
|
||||
$and: Joi.array().items(Joi.link('#whereSchema')),
|
||||
$or: Joi.array().items(Joi.link('#whereSchema')),
|
||||
$not: Joi.link('#whereSchema'),
|
||||
}).id('whereSchema');
|
||||
|
||||
const schema = Joi.object({
|
||||
meta: Joi.object({
|
||||
name: Joi.string(),
|
||||
desc: Joi.string(),
|
||||
}),
|
||||
workflow: Joi.array().items(
|
||||
Joi.object({
|
||||
id: Joi.string(),
|
||||
where: whereSchema.required(),
|
||||
what: Joi.array().items({
|
||||
action: Joi.string().required(),
|
||||
args: Joi.array().items(Joi.any()),
|
||||
}).required(),
|
||||
}),
|
||||
).required(),
|
||||
});
|
||||
|
||||
const { error } = schema.validate(workflow);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts parameter names from the workflow.
|
||||
* @param {WorkflowFile} workflow The given workflow
|
||||
* @returns {String[]} List of parameters' names.
|
||||
*/
|
||||
static getParams(workflow: WorkflowFile): string[] {
|
||||
const getParamsRecurse = (object: any): string[] => {
|
||||
if (typeof object === 'object') {
|
||||
// Recursion base case
|
||||
if (object.$param) {
|
||||
return [object.$param];
|
||||
}
|
||||
|
||||
// Recursion general case
|
||||
return Object.values(object)
|
||||
.reduce((p: string[], v: any): string[] => [...p, ...getParamsRecurse(v)], []);
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
return getParamsRecurse(workflow.workflow);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all the selectors used in the given workflow (only literal "selector"
|
||||
* field in WHERE clauses so far)
|
||||
*/
|
||||
// TODO : add recursive selector search (also in click/fill etc. events?)
|
||||
static extractSelectors(workflow: Workflow): SelectorArray {
|
||||
/**
|
||||
* Given a Where condition, this function extracts
|
||||
* all the existing selectors from it (recursively).
|
||||
*/
|
||||
const selectorsFromCondition = (where: Where): SelectorArray => {
|
||||
// the `selectors` field is either on the top level
|
||||
let out = where.selectors ?? [];
|
||||
if (!Array.isArray(out)) {
|
||||
out = [out];
|
||||
}
|
||||
|
||||
// or nested in the "operator" array
|
||||
operators.forEach((op) => {
|
||||
let condWhere = where[op];
|
||||
if (condWhere) {
|
||||
condWhere = Array.isArray(condWhere) ? condWhere : [condWhere];
|
||||
(condWhere).forEach((subWhere) => {
|
||||
out = [...out, ...selectorsFromCondition(subWhere)];
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
// Iterate through all the steps and extract the selectors from all of them.
|
||||
return workflow.reduce((p: SelectorArray, step) => [
|
||||
...p,
|
||||
...selectorsFromCondition(step.where).filter((x) => !p.includes(x)),
|
||||
], []);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
|
||||
* with the defined value.
|
||||
* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
|
||||
*/
|
||||
static initWorkflow(workflow: Workflow, params?: ParamType): Workflow {
|
||||
const paramNames = this.getParams({ workflow });
|
||||
|
||||
if (Object.keys(params ?? {}).sort().join(',') !== paramNames.sort().join(',')) {
|
||||
throw new Error(`Provided parameters do not match the workflow parameters
|
||||
provided: ${Object.keys(params ?? {}).sort().join(',')},
|
||||
expected: ${paramNames.sort().join(',')}
|
||||
`);
|
||||
}
|
||||
/**
|
||||
* A recursive method for initializing special `{key: value}` syntax objects in the workflow.
|
||||
* @param object Workflow to initialize (or a part of it).
|
||||
* @param k key to look for ($regex, $param)
|
||||
* @param f function mutating the special `{}` syntax into
|
||||
* its true representation (RegExp...)
|
||||
* @returns Updated object
|
||||
*/
|
||||
const initSpecialRecurse = (
|
||||
object: unknown,
|
||||
k: string,
|
||||
f: (value: string) => unknown,
|
||||
): unknown => {
|
||||
if (!object || typeof object !== 'object') {
|
||||
return object;
|
||||
}
|
||||
|
||||
const out = object;
|
||||
// for every key (child) of the object
|
||||
Object.keys(object!).forEach((key) => {
|
||||
// if the field has only one key, which is `k`
|
||||
if (Object.keys((<any>object)[key]).length === 1 && (<any>object)[key][k]) {
|
||||
// process the current special tag (init param, hydrate regex...)
|
||||
(<any>out)[key] = f((<any>object)[key][k]);
|
||||
} else {
|
||||
initSpecialRecurse((<any>object)[key], k, f);
|
||||
}
|
||||
});
|
||||
return out;
|
||||
};
|
||||
|
||||
// TODO: do better deep copy, this is hideous.
|
||||
let workflowCopy = JSON.parse(JSON.stringify(workflow));
|
||||
|
||||
if (params) {
|
||||
workflowCopy = initSpecialRecurse(
|
||||
workflowCopy,
|
||||
'$param',
|
||||
(paramName) => {
|
||||
if (params && params[paramName]) {
|
||||
return params[paramName];
|
||||
}
|
||||
throw new SyntaxError(`Unspecified parameter found ${paramName}.`);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
workflowCopy = initSpecialRecurse(
|
||||
workflowCopy,
|
||||
'$regex',
|
||||
(regex) => new RegExp(regex),
|
||||
);
|
||||
|
||||
return <Workflow>workflowCopy;
|
||||
}
|
||||
}
|
||||
5
maxun-core/src/types/logic.ts
Normal file
5
maxun-core/src/types/logic.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
export const unaryOperators = ['$not'] as const;
|
||||
export const naryOperators = ['$and', '$or'] as const;
|
||||
|
||||
export const operators = [...unaryOperators, ...naryOperators] as const;
|
||||
export const meta = ['$before', '$after'] as const;
|
||||
58
maxun-core/src/types/workflow.ts
Normal file
58
maxun-core/src/types/workflow.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { Page } from 'playwright';
|
||||
import {
|
||||
naryOperators, unaryOperators, operators, meta,
|
||||
} from './logic';
|
||||
|
||||
export type Operator = typeof operators[number];
|
||||
export type UnaryOperator = typeof unaryOperators[number];
|
||||
export type NAryOperator = typeof naryOperators[number];
|
||||
|
||||
export type Meta = typeof meta[number];
|
||||
|
||||
export type SelectorArray = string[];
|
||||
|
||||
type RegexableString = string | { '$regex': string };
|
||||
|
||||
type BaseConditions = {
|
||||
'url': RegexableString,
|
||||
'cookies': Record<string, RegexableString>,
|
||||
'selectors': SelectorArray, // (CSS/Playwright) selectors use their own logic, there is no reason (and several technical difficulties) to allow regular expression notation
|
||||
} & Record<Meta, RegexableString>;
|
||||
|
||||
export type Where =
|
||||
Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N)
|
||||
Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator
|
||||
Partial<BaseConditions>; // or one of the base conditions
|
||||
|
||||
type MethodNames<T> = {
|
||||
[K in keyof T]: T[K] extends Function ? K : never;
|
||||
}[keyof T];
|
||||
|
||||
export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag';
|
||||
|
||||
export type What = {
|
||||
action: MethodNames<Page> | CustomFunctions,
|
||||
args?: any[]
|
||||
};
|
||||
|
||||
export type PageState = Partial<BaseConditions>;
|
||||
|
||||
export type ParamType = Record<string, any>;
|
||||
|
||||
export type MetaData = {
|
||||
name?: string,
|
||||
desc?: string,
|
||||
};
|
||||
|
||||
export interface WhereWhatPair {
|
||||
id?: string
|
||||
where: Where
|
||||
what: What[]
|
||||
}
|
||||
|
||||
export type Workflow = WhereWhatPair[];
|
||||
|
||||
export type WorkflowFile = {
|
||||
meta?: MetaData,
|
||||
workflow: Workflow
|
||||
};
|
||||
85
maxun-core/src/utils/concurrency.ts
Normal file
85
maxun-core/src/utils/concurrency.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* Concurrency class for running concurrent tasks while managing a limited amount of resources.
|
||||
*/
|
||||
export default class Concurrency {
|
||||
/**
|
||||
* Maximum number of workers running in parallel. If set to `null`, there is no limit.
|
||||
*/
|
||||
maxConcurrency: number = 1;
|
||||
|
||||
/**
|
||||
* Number of currently active workers.
|
||||
*/
|
||||
activeWorkers: number = 0;
|
||||
|
||||
/**
|
||||
* Queue of jobs waiting to be completed.
|
||||
*/
|
||||
private jobQueue: Function[] = [];
|
||||
|
||||
/**
|
||||
* "Resolve" callbacks of the waitForCompletion() promises.
|
||||
*/
|
||||
private waiting: Function[] = [];
|
||||
|
||||
/**
|
||||
* Constructs a new instance of concurrency manager.
|
||||
* @param {number} maxConcurrency Maximum number of workers running in parallel.
|
||||
*/
|
||||
constructor(maxConcurrency: number) {
|
||||
this.maxConcurrency = maxConcurrency;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a waiting job out of the queue and runs it.
|
||||
*/
|
||||
private runNextJob(): void {
|
||||
const job = this.jobQueue.pop();
|
||||
|
||||
if (job) {
|
||||
// console.debug("Running a job...");
|
||||
job().then(() => {
|
||||
// console.debug("Job finished, running the next waiting job...");
|
||||
this.runNextJob();
|
||||
});
|
||||
} else {
|
||||
// console.debug("No waiting job found!");
|
||||
this.activeWorkers -= 1;
|
||||
if (this.activeWorkers === 0) {
|
||||
// console.debug("This concurrency manager is idle!");
|
||||
this.waiting.forEach((x) => x());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass a job (a time-demanding async function) to the concurrency manager. \
|
||||
* The time of the job's execution depends on the concurrency manager itself
|
||||
* (given a generous enough `maxConcurrency` value, it might be immediate,
|
||||
* but this is not guaranteed).
|
||||
* @param worker Async function to be executed (job to be processed).
|
||||
*/
|
||||
addJob(job: () => Promise<any>): void {
|
||||
// console.debug("Adding a worker!");
|
||||
this.jobQueue.push(job);
|
||||
|
||||
if (!this.maxConcurrency || this.activeWorkers < this.maxConcurrency) {
|
||||
this.runNextJob();
|
||||
this.activeWorkers += 1;
|
||||
} else {
|
||||
// console.debug("No capacity to run a worker now, waiting!");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits until there is no running nor waiting job. \
|
||||
* If the concurrency manager is idle at the time of calling this function,
|
||||
* it waits until at least one job is compeleted (can be "presubscribed").
|
||||
* @returns Promise, resolved after there is no running/waiting worker.
|
||||
*/
|
||||
waitForCompletion(): Promise<void> {
|
||||
return new Promise((res) => {
|
||||
this.waiting.push(res);
|
||||
});
|
||||
}
|
||||
}
|
||||
30
maxun-core/src/utils/logger.ts
Normal file
30
maxun-core/src/utils/logger.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Logger class for more detailed and comprehensible logs (with colors and timestamps)
|
||||
*/
|
||||
|
||||
export enum Level {
|
||||
DATE = 36,
|
||||
LOG = 0,
|
||||
WARN = 93,
|
||||
ERROR = 31,
|
||||
DEBUG = 95,
|
||||
RESET = 0,
|
||||
}
|
||||
|
||||
export default function logger(
|
||||
message: string | Error,
|
||||
level: (Level.LOG | Level.WARN | Level.ERROR | Level.DEBUG) = Level.LOG,
|
||||
) {
|
||||
let m = message;
|
||||
if (message.constructor.name.includes('Error') && typeof message !== 'string') {
|
||||
m = <Error><unknown>(message).message;
|
||||
}
|
||||
process.stdout.write(`\x1b[${Level.DATE}m[${(new Date()).toLocaleString()}]\x1b[0m `);
|
||||
process.stdout.write(`\x1b[${level}m`);
|
||||
if (level === Level.ERROR || level === Level.WARN) {
|
||||
process.stderr.write(<string>m);
|
||||
} else {
|
||||
process.stdout.write(<string>m);
|
||||
}
|
||||
process.stdout.write(`\x1b[${Level.RESET}m\n`);
|
||||
}
|
||||
13
maxun-core/src/utils/utils.ts
Normal file
13
maxun-core/src/utils/utils.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* ESLint rule in case there is only one util function
|
||||
* (it still does not represent the "utils" file)
|
||||
*/
|
||||
|
||||
/* eslint-disable import/prefer-default-export */
|
||||
|
||||
/**
|
||||
* Converts an array of scalars to an object with **items** of the array **for keys**.
|
||||
*/
|
||||
export function arrayToObject(array : any[]) {
|
||||
return array.reduce((p, x) => ({ ...p, [x]: [] }), {});
|
||||
}
|
||||
11
maxun-core/tsconfig.json
Normal file
11
maxun-core/tsconfig.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"outDir": "./build",
|
||||
"declaration": true,
|
||||
"allowJs": true,
|
||||
"target": "es5",
|
||||
"module": "commonjs",
|
||||
"esModuleInterop": true
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
459
mx-interpreter/interpret.ts
Normal file
459
mx-interpreter/interpret.ts
Normal file
@@ -0,0 +1,459 @@
|
||||
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
||||
import { Page, PageScreenshotOptions } from 'playwright';
|
||||
import path from 'path';
|
||||
|
||||
import { EventEmitter } from 'events';
|
||||
import {
|
||||
Where, What, PageState, Workflow, WorkflowFile,
|
||||
ParamType, SelectorArray, CustomFunctions,
|
||||
} from './types/workflow';
|
||||
|
||||
import { operators, meta } from './types/logic';
|
||||
import { arrayToObject } from './utils/utils';
|
||||
import Concurrency from './utils/concurrency';
|
||||
import Preprocessor from './preprocessor';
|
||||
import log, { Level } from './utils/logger';
|
||||
|
||||
/**
|
||||
* Defines optional intepreter options (passed in constructor)
|
||||
*/
|
||||
interface InterpreterOptions {
|
||||
maxRepeats: number;
|
||||
maxConcurrency: number;
|
||||
serializableCallback: (output: any) => (void | Promise<void>);
|
||||
binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
|
||||
debug: boolean;
|
||||
debugChannel: Partial<{
|
||||
activeId: Function,
|
||||
debugMessage: Function,
|
||||
}>
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for running the Smart Workflows.
|
||||
*/
|
||||
export default class Interpreter extends EventEmitter {
|
||||
private workflow: Workflow;
|
||||
|
||||
private initializedWorkflow: Workflow | null;
|
||||
|
||||
private options: InterpreterOptions;
|
||||
|
||||
private concurrency: Concurrency;
|
||||
|
||||
private stopper: Function | null = null;
|
||||
|
||||
private log: typeof log;
|
||||
|
||||
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
|
||||
super();
|
||||
this.workflow = workflow.workflow;
|
||||
this.initializedWorkflow = null;
|
||||
this.options = {
|
||||
maxRepeats: 5,
|
||||
maxConcurrency: 5,
|
||||
serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); },
|
||||
binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); },
|
||||
debug: false,
|
||||
debugChannel: {},
|
||||
...options,
|
||||
};
|
||||
this.concurrency = new Concurrency(this.options.maxConcurrency);
|
||||
this.log = (...args) => log(...args);
|
||||
|
||||
const error = Preprocessor.validateWorkflow(workflow);
|
||||
if (error) {
|
||||
throw (error);
|
||||
}
|
||||
|
||||
if (this.options.debugChannel?.debugMessage) {
|
||||
const oldLog = this.log;
|
||||
// @ts-ignore
|
||||
this.log = (...args: Parameters<typeof oldLog>) => {
|
||||
if (args[1] !== Level.LOG) {
|
||||
this.options.debugChannel.debugMessage!(typeof args[0] === 'string' ? args[0] : args[0].message);
|
||||
}
|
||||
oldLog(...args);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the context object from given Page and the current workflow.\
|
||||
* \
|
||||
* `workflow` is used for selector extraction - function searches for used selectors to
|
||||
* look for later in the page's context.
|
||||
* @param page Playwright Page object
|
||||
* @param workflow Current **initialized** workflow (array of where-what pairs).
|
||||
* @returns {PageState} State of the current page.
|
||||
*/
|
||||
private async getState(page: Page, workflow: Workflow): Promise<PageState> {
|
||||
/**
|
||||
* All the selectors present in the current Workflow
|
||||
*/
|
||||
const selectors = Preprocessor.extractSelectors(workflow);
|
||||
|
||||
/**
|
||||
* Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability).
|
||||
* @param selector Selector to be queried
|
||||
* @returns True if the targetted element is actionable, false otherwise.
|
||||
*/
|
||||
const actionable = async (selector: string): Promise<boolean> => {
|
||||
try {
|
||||
const proms = [
|
||||
page.isEnabled(selector, { timeout: 500 }),
|
||||
page.isVisible(selector, { timeout: 500 }),
|
||||
];
|
||||
|
||||
return await Promise.all(proms).then((bools) => bools.every((x) => x));
|
||||
} catch (e) {
|
||||
// log(<Error>e, Level.ERROR);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Object of selectors present in the current page.
|
||||
*/
|
||||
const presentSelectors: SelectorArray = await Promise.all(
|
||||
selectors.map(async (selector) => {
|
||||
if (await actionable(selector)) {
|
||||
return [selector];
|
||||
}
|
||||
return [];
|
||||
}),
|
||||
).then((x) => x.flat());
|
||||
|
||||
return {
|
||||
url: page.url(),
|
||||
cookies: (await page.context().cookies([page.url()]))
|
||||
.reduce((p, cookie) => (
|
||||
{
|
||||
...p,
|
||||
[cookie.name]: cookie.value,
|
||||
}), {}),
|
||||
selectors: presentSelectors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if the given action is applicable with the given context.
|
||||
* @param where Tested *where* condition
|
||||
* @param context Current browser context.
|
||||
* @returns True if `where` is applicable in the given context, false otherwise
|
||||
*/
|
||||
private applicable(where: Where, context: PageState, usedActions: string[] = []): boolean {
|
||||
/**
|
||||
* Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\
|
||||
* \
|
||||
* For every key in `subset`, there must be a corresponding key with equal scalar
|
||||
* value in `superset`, or `inclusive(subset[key], superset[key])` must hold.
|
||||
* @param subset Arbitrary non-cyclic JS object (where clause)
|
||||
* @param superset Arbitrary non-cyclic JS object (browser context)
|
||||
* @returns `true` if `subset <= superset`, `false` otherwise.
|
||||
*/
|
||||
const inclusive = (subset: Record<string, unknown>, superset: Record<string, unknown>)
|
||||
: boolean => (
|
||||
Object.entries(subset).every(
|
||||
([key, value]) => {
|
||||
/**
|
||||
* Arrays are compared without order (are transformed into objects before comparison).
|
||||
*/
|
||||
const parsedValue = Array.isArray(value) ? arrayToObject(value) : value;
|
||||
|
||||
const parsedSuperset: Record<string, unknown> = {};
|
||||
parsedSuperset[key] = Array.isArray(superset[key])
|
||||
? arrayToObject(<any>superset[key])
|
||||
: superset[key];
|
||||
|
||||
// Every `subset` key must exist in the `superset` and
|
||||
// have the same value (strict equality), or subset[key] <= superset[key]
|
||||
return parsedSuperset[key]
|
||||
&& (
|
||||
(parsedSuperset[key] === parsedValue)
|
||||
|| ((parsedValue).constructor.name === 'RegExp' && (<RegExp>parsedValue).test(<string>parsedSuperset[key]))
|
||||
|| (
|
||||
(parsedValue).constructor.name !== 'RegExp'
|
||||
&& typeof parsedValue === 'object' && inclusive(<typeof subset>parsedValue, <typeof superset>parsedSuperset[key])
|
||||
)
|
||||
);
|
||||
},
|
||||
)
|
||||
);
|
||||
|
||||
// Every value in the "where" object should be compliant to the current state.
|
||||
return Object.entries(where).every(
|
||||
([key, value]) => {
|
||||
if (operators.includes(<any>key)) {
|
||||
const array = Array.isArray(value)
|
||||
? value as Where[]
|
||||
: Object.entries(value).map((a) => Object.fromEntries([a]));
|
||||
// every condition is treated as a single context
|
||||
|
||||
switch (key as keyof typeof operators) {
|
||||
case '$and':
|
||||
return array?.every((x) => this.applicable(x, context));
|
||||
case '$or':
|
||||
return array?.some((x) => this.applicable(x, context));
|
||||
case '$not':
|
||||
return !this.applicable(<Where>value, context); // $not should be a unary operator
|
||||
default:
|
||||
throw new Error('Undefined logic operator.');
|
||||
}
|
||||
} else if (meta.includes(<any>key)) {
|
||||
const testRegexString = (x: string) => {
|
||||
if (typeof value === 'string') {
|
||||
return x === value;
|
||||
}
|
||||
|
||||
return (<RegExp><unknown>value).test(x);
|
||||
};
|
||||
|
||||
switch (key as keyof typeof meta) {
|
||||
case '$before':
|
||||
return !usedActions.find(testRegexString);
|
||||
case '$after':
|
||||
return !!usedActions.find(testRegexString);
|
||||
default:
|
||||
throw new Error('Undefined meta operator.');
|
||||
}
|
||||
} else {
|
||||
// Current key is a base condition (url, cookies, selectors)
|
||||
return inclusive({ [key]: value }, context);
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Playwright's page object and a "declarative" list of actions, this function
|
||||
* calls all mentioned functions on the Page object.\
|
||||
* \
|
||||
* Manipulates the iterator indexes (experimental feature, likely to be removed in
|
||||
* the following versions of waw-interpreter)
|
||||
* @param page Playwright Page object
|
||||
* @param steps Array of actions.
|
||||
*/
|
||||
private async carryOutSteps(page: Page, steps: What[]): Promise<void> {
|
||||
/**
|
||||
* Defines overloaded (or added) methods/actions usable in the workflow.
|
||||
* If a method overloads any existing method of the Page class, it accepts the same set
|
||||
* of parameters *(but can override some!)*\
|
||||
* \
|
||||
* Also, following piece of code defines functions to be run in the browser's context.
|
||||
* Beware of false linter errors - here, we know better!
|
||||
*/
|
||||
const wawActions: Record<CustomFunctions, (...args: any[]) => void> = {
|
||||
screenshot: async (params: PageScreenshotOptions) => {
|
||||
const screenshotBuffer = await page.screenshot({
|
||||
...params, path: undefined,
|
||||
});
|
||||
await this.options.binaryCallback(screenshotBuffer, 'image/png');
|
||||
},
|
||||
enqueueLinks: async (selector: string) => {
|
||||
const links: string[] = await page.locator(selector)
|
||||
.evaluateAll(
|
||||
// @ts-ignore
|
||||
(elements) => elements.map((a) => a.href).filter((x) => x),
|
||||
);
|
||||
const context = page.context();
|
||||
|
||||
for (const link of links) {
|
||||
// eslint-disable-next-line
|
||||
this.concurrency.addJob(async () => {
|
||||
try {
|
||||
const newPage = await context.newPage();
|
||||
await newPage.goto(link);
|
||||
await newPage.waitForLoadState('networkidle');
|
||||
await this.runLoop(newPage, this.initializedWorkflow!);
|
||||
} catch (e) {
|
||||
// `runLoop` uses soft mode, so it recovers from it's own exceptions
|
||||
// but newPage(), goto() and waitForLoadState() don't (and will kill
|
||||
// the interpreter by throwing).
|
||||
this.log(<Error>e, Level.ERROR);
|
||||
}
|
||||
});
|
||||
}
|
||||
await page.close();
|
||||
},
|
||||
scrape: async (selector?: string) => {
|
||||
const scrapeResults: Record<string, string>[] = <any>await page
|
||||
// eslint-disable-next-line
|
||||
// @ts-ignore
|
||||
.evaluate((s) => scrape(s ?? null), selector);
|
||||
await this.options.serializableCallback(scrapeResults);
|
||||
},
|
||||
scrapeSchema: async (schema: Record<string, string>) => {
|
||||
const handleLists = await Promise.all(
|
||||
Object.values(schema).map((selector) => page.$$(selector)),
|
||||
);
|
||||
|
||||
const namedHandleLists = Object.fromEntries(
|
||||
Object.keys(schema).map((key, i) => [key, handleLists[i]]),
|
||||
);
|
||||
|
||||
const scrapeResult = await page.evaluate((n) => scrapeSchema(n), namedHandleLists);
|
||||
|
||||
this.options.serializableCallback(scrapeResult);
|
||||
},
|
||||
scroll: async (pages?: number) => {
|
||||
await page.evaluate(async (pagesInternal) => {
|
||||
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
|
||||
// @ts-ignore
|
||||
window.scrollTo(0, window.scrollY + window.innerHeight);
|
||||
}
|
||||
}, pages ?? 1);
|
||||
},
|
||||
script: async (code: string) => {
|
||||
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
|
||||
async () => { },
|
||||
).constructor;
|
||||
const x = new AsyncFunction('page', 'log', code);
|
||||
await x(page, this.log);
|
||||
},
|
||||
flag: async () => new Promise((res) => {
|
||||
this.emit('flag', page, res);
|
||||
}),
|
||||
};
|
||||
|
||||
for (const step of steps) {
|
||||
this.log(`Launching ${step.action}`, Level.LOG);
|
||||
|
||||
if (step.action in wawActions) {
|
||||
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
||||
const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
|
||||
await wawActions[step.action as CustomFunctions](...(params ?? []));
|
||||
} else {
|
||||
// Implements the dot notation for the "method name" in the workflow
|
||||
const levels = step.action.split('.');
|
||||
const methodName = levels[levels.length - 1];
|
||||
|
||||
let invokee: any = page;
|
||||
for (const level of levels.splice(0, levels.length - 1)) {
|
||||
invokee = invokee[level];
|
||||
}
|
||||
|
||||
if (!step.args || Array.isArray(step.args)) {
|
||||
await (<any>invokee[methodName])(...(step.args ?? []));
|
||||
} else {
|
||||
await (<any>invokee[methodName])(step.args);
|
||||
}
|
||||
}
|
||||
|
||||
await new Promise((res) => { setTimeout(res, 500); });
|
||||
}
|
||||
}
|
||||
|
||||
private async runLoop(p: Page, workflow: Workflow) {
|
||||
const usedActions: string[] = [];
|
||||
let lastAction = null;
|
||||
let repeatCount = 0;
|
||||
|
||||
/**
|
||||
* Enables the interpreter functionality for popup windows.
|
||||
* User-requested concurrency should be entirely managed by the concurrency manager,
|
||||
* e.g. via `enqueueLinks`.
|
||||
*/
|
||||
p.on('popup', (popup) => {
|
||||
this.concurrency.addJob(() => this.runLoop(popup, workflow));
|
||||
});
|
||||
|
||||
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
||||
while (true) {
|
||||
// Checks whether the page was closed from outside,
|
||||
// or the workflow execution has been stopped via `interpreter.stop()`
|
||||
if (p.isClosed() || !this.stopper) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await p.waitForLoadState();
|
||||
} catch (e) {
|
||||
await p.close();
|
||||
return;
|
||||
}
|
||||
|
||||
let pageState = {};
|
||||
try {
|
||||
pageState = await this.getState(p, workflow);
|
||||
} catch (e: any) {
|
||||
this.log('The browser has been closed.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.options.debug) {
|
||||
this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN);
|
||||
}
|
||||
const actionId = workflow.findIndex(
|
||||
(step) => this.applicable(step.where, pageState, usedActions),
|
||||
);
|
||||
|
||||
const action = workflow[actionId];
|
||||
|
||||
this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG);
|
||||
|
||||
if (action) { // action is matched
|
||||
if (this.options.debugChannel?.activeId) {
|
||||
this.options.debugChannel.activeId(actionId);
|
||||
}
|
||||
|
||||
repeatCount = action === lastAction ? repeatCount + 1 : 0;
|
||||
if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) {
|
||||
return;
|
||||
}
|
||||
lastAction = action;
|
||||
|
||||
try {
|
||||
await this.carryOutSteps(p, action.what);
|
||||
usedActions.push(action.id ?? 'undefined');
|
||||
} catch (e) {
|
||||
this.log(<Error>e, Level.ERROR);
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawns a browser context and runs given workflow.
|
||||
* \
|
||||
* Resolves after the playback is finished.
|
||||
* @param {Page} [page] Page to run the workflow on.
|
||||
* @param {ParamType} params Workflow specific, set of parameters
|
||||
* for the `{$param: nameofparam}` fields.
|
||||
*/
|
||||
public async run(page: Page, params?: ParamType): Promise<void> {
|
||||
if (this.stopper) {
|
||||
throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.');
|
||||
}
|
||||
/**
|
||||
* `this.workflow` with the parameters initialized.
|
||||
*/
|
||||
this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params);
|
||||
|
||||
// @ts-ignore
|
||||
if (await page.evaluate(() => !<any>window.scrape)) {
|
||||
page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
|
||||
}
|
||||
|
||||
this.stopper = () => {
|
||||
this.stopper = null;
|
||||
};
|
||||
|
||||
this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!));
|
||||
|
||||
await this.concurrency.waitForCompletion();
|
||||
|
||||
this.stopper = null;
|
||||
}
|
||||
|
||||
public async stop(): Promise<void> {
|
||||
if (this.stopper) {
|
||||
await this.stopper();
|
||||
this.stopper = null;
|
||||
} else {
|
||||
throw new Error('Cannot stop, there is no running workflow!');
|
||||
}
|
||||
}
|
||||
}
|
||||
179
mx-interpreter/preprocessor.ts
Normal file
179
mx-interpreter/preprocessor.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import Joi from 'joi';
|
||||
import {
|
||||
Workflow, WorkflowFile, ParamType, SelectorArray, Where,
|
||||
} from './types/workflow';
|
||||
import { operators } from './types/logic';
|
||||
|
||||
/**
|
||||
* Class for static processing the workflow files/objects.
|
||||
*/
|
||||
export default class Preprocessor {
|
||||
static validateWorkflow(workflow: WorkflowFile): any {
|
||||
const regex = Joi.object({
|
||||
$regex: Joi.string().required(),
|
||||
});
|
||||
|
||||
const whereSchema = Joi.object({
|
||||
url: [Joi.string().uri(), regex],
|
||||
selectors: Joi.array().items(Joi.string()),
|
||||
cookies: Joi.object({}).pattern(Joi.string(), Joi.string()),
|
||||
$after: [Joi.string(), regex],
|
||||
$before: [Joi.string(), regex],
|
||||
$and: Joi.array().items(Joi.link('#whereSchema')),
|
||||
$or: Joi.array().items(Joi.link('#whereSchema')),
|
||||
$not: Joi.link('#whereSchema'),
|
||||
}).id('whereSchema');
|
||||
|
||||
const schema = Joi.object({
|
||||
meta: Joi.object({
|
||||
name: Joi.string(),
|
||||
desc: Joi.string(),
|
||||
}),
|
||||
workflow: Joi.array().items(
|
||||
Joi.object({
|
||||
id: Joi.string(),
|
||||
where: whereSchema.required(),
|
||||
what: Joi.array().items({
|
||||
action: Joi.string().required(),
|
||||
args: Joi.array().items(Joi.any()),
|
||||
}).required(),
|
||||
}),
|
||||
).required(),
|
||||
});
|
||||
|
||||
const { error } = schema.validate(workflow);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts parameter names from the workflow.
|
||||
* @param {WorkflowFile} workflow The given workflow
|
||||
* @returns {String[]} List of parameters' names.
|
||||
*/
|
||||
static getParams(workflow: WorkflowFile): string[] {
|
||||
const getParamsRecurse = (object: any): string[] => {
|
||||
if (typeof object === 'object') {
|
||||
// Recursion base case
|
||||
if (object.$param) {
|
||||
return [object.$param];
|
||||
}
|
||||
|
||||
// Recursion general case
|
||||
return Object.values(object)
|
||||
.reduce((p: string[], v: any): string[] => [...p, ...getParamsRecurse(v)], []);
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
return getParamsRecurse(workflow.workflow);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all the selectors used in the given workflow (only literal "selector"
|
||||
* field in WHERE clauses so far)
|
||||
*/
|
||||
// TODO : add recursive selector search (also in click/fill etc. events?)
|
||||
static extractSelectors(workflow: Workflow): SelectorArray {
|
||||
/**
|
||||
* Given a Where condition, this function extracts
|
||||
* all the existing selectors from it (recursively).
|
||||
*/
|
||||
const selectorsFromCondition = (where: Where): SelectorArray => {
|
||||
// the `selectors` field is either on the top level
|
||||
let out = where.selectors ?? [];
|
||||
if (!Array.isArray(out)) {
|
||||
out = [out];
|
||||
}
|
||||
|
||||
// or nested in the "operator" array
|
||||
operators.forEach((op) => {
|
||||
let condWhere = where[op];
|
||||
if (condWhere) {
|
||||
condWhere = Array.isArray(condWhere) ? condWhere : [condWhere];
|
||||
(condWhere).forEach((subWhere) => {
|
||||
out = [...out, ...selectorsFromCondition(subWhere)];
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
// Iterate through all the steps and extract the selectors from all of them.
|
||||
return workflow.reduce((p: SelectorArray, step) => [
|
||||
...p,
|
||||
...selectorsFromCondition(step.where).filter((x) => !p.includes(x)),
|
||||
], []);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
|
||||
* with the defined value.
|
||||
* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
|
||||
*/
|
||||
static initWorkflow(workflow: Workflow, params?: ParamType): Workflow {
|
||||
const paramNames = this.getParams({ workflow });
|
||||
|
||||
if (Object.keys(params ?? {}).sort().join(',') !== paramNames.sort().join(',')) {
|
||||
throw new Error(`Provided parameters do not match the workflow parameters
|
||||
provided: ${Object.keys(params ?? {}).sort().join(',')},
|
||||
expected: ${paramNames.sort().join(',')}
|
||||
`);
|
||||
}
|
||||
/**
|
||||
* A recursive method for initializing special `{key: value}` syntax objects in the workflow.
|
||||
* @param object Workflow to initialize (or a part of it).
|
||||
* @param k key to look for ($regex, $param)
|
||||
* @param f function mutating the special `{}` syntax into
|
||||
* its true representation (RegExp...)
|
||||
* @returns Updated object
|
||||
*/
|
||||
const initSpecialRecurse = (
|
||||
object: unknown,
|
||||
k: string,
|
||||
f: (value: string) => unknown,
|
||||
): unknown => {
|
||||
if (!object || typeof object !== 'object') {
|
||||
return object;
|
||||
}
|
||||
|
||||
const out = object;
|
||||
// for every key (child) of the object
|
||||
Object.keys(object!).forEach((key) => {
|
||||
// if the field has only one key, which is `k`
|
||||
if (Object.keys((<any>object)[key]).length === 1 && (<any>object)[key][k]) {
|
||||
// process the current special tag (init param, hydrate regex...)
|
||||
(<any>out)[key] = f((<any>object)[key][k]);
|
||||
} else {
|
||||
initSpecialRecurse((<any>object)[key], k, f);
|
||||
}
|
||||
});
|
||||
return out;
|
||||
};
|
||||
|
||||
// TODO: do better deep copy, this is hideous.
|
||||
let workflowCopy = JSON.parse(JSON.stringify(workflow));
|
||||
|
||||
if (params) {
|
||||
workflowCopy = initSpecialRecurse(
|
||||
workflowCopy,
|
||||
'$param',
|
||||
(paramName) => {
|
||||
if (params && params[paramName]) {
|
||||
return params[paramName];
|
||||
}
|
||||
throw new SyntaxError(`Unspecified parameter found ${paramName}.`);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
workflowCopy = initSpecialRecurse(
|
||||
workflowCopy,
|
||||
'$regex',
|
||||
(regex) => new RegExp(regex),
|
||||
);
|
||||
|
||||
return <Workflow>workflowCopy;
|
||||
}
|
||||
}
|
||||
@@ -271,6 +271,7 @@ const handleChangeUrl = async (generator: WorkflowGenerator, page: Page, url: st
|
||||
try {
|
||||
await page.goto(url);
|
||||
logger.log('debug', `Went to ${url}`);
|
||||
console.log(`Went to ${url}`)
|
||||
} catch (e) {
|
||||
const { message } = e as Error;
|
||||
logger.log('error', message);
|
||||
|
||||
235
server/src/routes/storage.ts
Normal file
235
server/src/routes/storage.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* RESTful API endpoints handling the recording storage.
|
||||
*/
|
||||
|
||||
import { Router } from 'express';
|
||||
import logger from "../logger";
|
||||
import { deleteFile, readFile, readFiles, saveFile } from "../workflow-management/storage";
|
||||
import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller";
|
||||
import { chromium } from "playwright";
|
||||
import { browserPool } from "../server";
|
||||
import fs from "fs";
|
||||
import { uuid } from "uuidv4";
|
||||
|
||||
export const router = Router();
|
||||
|
||||
/**
|
||||
* Logs information about recordings API.
|
||||
*/
|
||||
router.all('/', (req, res, next) => {
|
||||
logger.log('debug',`The recordings API was invoked: ${req.url}`)
|
||||
next() // pass control to the next handler
|
||||
})
|
||||
|
||||
/**
|
||||
* GET endpoint for getting an array of all stored recordings.
|
||||
*/
|
||||
router.get('/recordings', async (req, res) => {
|
||||
try {
|
||||
const data = await readFiles('./../storage/recordings/');
|
||||
return res.send(data);
|
||||
} catch (e) {
|
||||
logger.log('info', 'Error while reading recordings');
|
||||
return res.send(null);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE endpoint for deleting a recording from the storage.
|
||||
*/
|
||||
router.delete('/recordings/:fileName', async (req, res) => {
|
||||
try {
|
||||
await deleteFile(`./../storage/recordings/${req.params.fileName}.waw.json`);
|
||||
return res.send(true);
|
||||
} catch (e) {
|
||||
const {message} = e as Error;
|
||||
logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.waw.json`);
|
||||
return res.send(false);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET endpoint for getting an array of runs from the storage.
|
||||
*/
|
||||
router.get('/runs', async (req, res) => {
|
||||
try {
|
||||
const data = await readFiles('./../storage/runs/');
|
||||
return res.send(data);
|
||||
} catch (e) {
|
||||
logger.log('info', 'Error while reading runs');
|
||||
return res.send(null);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE endpoint for deleting a run from the storage.
|
||||
*/
|
||||
router.delete('/runs/:fileName', async (req, res) => {
|
||||
try {
|
||||
await deleteFile(`./../storage/runs/${req.params.fileName}.json`);
|
||||
return res.send(true);
|
||||
} catch (e) {
|
||||
const {message} = e as Error;
|
||||
logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`);
|
||||
return res.send(false);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* PUT endpoint for starting a remote browser instance and saving run metadata to the storage.
|
||||
* Making it ready for interpretation and returning a runId.
|
||||
*/
|
||||
router.put('/runs/:fileName', async (req, res) => {
|
||||
try {
|
||||
const id = createRemoteBrowserForRun({
|
||||
browser: chromium,
|
||||
launchOptions: { headless: true }
|
||||
});
|
||||
|
||||
const runId = uuid();
|
||||
|
||||
const run_meta = {
|
||||
status: 'RUNNING',
|
||||
name: req.params.fileName,
|
||||
startedAt: new Date().toLocaleString(),
|
||||
finishedAt: '',
|
||||
duration: '',
|
||||
task: req.body.params ? 'task' : '',
|
||||
browserId: id,
|
||||
interpreterSettings: req.body,
|
||||
log: '',
|
||||
runId,
|
||||
};
|
||||
fs.mkdirSync('../storage/runs', { recursive: true })
|
||||
await saveFile(
|
||||
`../storage/runs/${req.params.fileName}_${runId}.json`,
|
||||
JSON.stringify({ ...run_meta }, null, 2)
|
||||
);
|
||||
logger.log('debug', `Created run with name: ${req.params.fileName}.json`);
|
||||
return res.send({
|
||||
browserId: id,
|
||||
runId: runId,
|
||||
});
|
||||
} catch (e) {
|
||||
const {message} = e as Error;
|
||||
logger.log('info', `Error while creating a run with name: ${req.params.fileName}.json`);
|
||||
return res.send('');
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET endpoint for getting a run from the storage.
|
||||
*/
|
||||
router.get('/runs/run/:fileName/:runId', async (req, res) => {
|
||||
try {
|
||||
// read the run from storage
|
||||
const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`)
|
||||
const parsedRun = JSON.parse(run);
|
||||
return res.send(parsedRun);
|
||||
} catch (e) {
|
||||
const { message } = e as Error;
|
||||
logger.log('error', `Error ${message} while reading a run with name: ${req.params.fileName}_${req.params.runId}.json`);
|
||||
return res.send(null);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* PUT endpoint for finishing a run and saving it to the storage.
|
||||
*/
|
||||
router.post('/runs/run/:fileName/:runId', async (req, res) => {
|
||||
try {
|
||||
// read the recording from storage
|
||||
const recording = await readFile(`./../storage/recordings/${req.params.fileName}.waw.json`)
|
||||
const parsedRecording = JSON.parse(recording);
|
||||
// read the run from storage
|
||||
const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`)
|
||||
const parsedRun = JSON.parse(run);
|
||||
|
||||
// interpret the run in active browser
|
||||
const browser = browserPool.getRemoteBrowser(parsedRun.browserId);
|
||||
const currentPage = browser?.getCurrentPage();
|
||||
if (browser && currentPage) {
|
||||
const interpretationInfo = await browser.interpreter.InterpretRecording(
|
||||
parsedRecording.recording, currentPage, parsedRun.interpreterSettings);
|
||||
const duration = Math.round((new Date().getTime() - new Date(parsedRun.startedAt).getTime()) / 1000);
|
||||
const durString = (() => {
|
||||
if (duration < 60) {
|
||||
return `${duration} s`;
|
||||
}
|
||||
else {
|
||||
const minAndS = (duration / 60).toString().split('.');
|
||||
return `${minAndS[0]} m ${minAndS[1]} s`;
|
||||
}
|
||||
})();
|
||||
await destroyRemoteBrowser(parsedRun.browserId);
|
||||
const run_meta = {
|
||||
...parsedRun,
|
||||
status: interpretationInfo.result,
|
||||
finishedAt: new Date().toLocaleString(),
|
||||
duration: durString,
|
||||
browserId: null,
|
||||
log: interpretationInfo.log.join('\n'),
|
||||
serializableOutput: interpretationInfo.serializableOutput,
|
||||
binaryOutput: interpretationInfo.binaryOutput,
|
||||
};
|
||||
fs.mkdirSync('../storage/runs', { recursive: true })
|
||||
await saveFile(
|
||||
`../storage/runs/${parsedRun.name}_${req.params.runId}.json`,
|
||||
JSON.stringify(run_meta, null, 2)
|
||||
);
|
||||
return res.send(true);
|
||||
} else {
|
||||
throw new Error('Could not destroy browser');
|
||||
}
|
||||
} catch (e) {
|
||||
const {message} = e as Error;
|
||||
logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`);
|
||||
return res.send(false);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST endpoint for aborting a current interpretation of the run.
|
||||
*/
|
||||
router.post('/runs/abort/:fileName/:runId', async (req, res) => {
|
||||
try {
|
||||
// read the run from storage
|
||||
const run = await readFile(`./../storage/runs/${req.params.fileName}_${req.params.runId}.json`)
|
||||
const parsedRun = JSON.parse(run);
|
||||
|
||||
//get current log
|
||||
const browser = browserPool.getRemoteBrowser(parsedRun.browserId);
|
||||
const currentLog = browser?.interpreter.debugMessages.join('/n');
|
||||
const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => {
|
||||
return {
|
||||
[`item-${index}`]: item,
|
||||
...reducedObject,
|
||||
}
|
||||
}, {});
|
||||
const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => {
|
||||
return {
|
||||
[`item-${index}`]: item,
|
||||
...reducedObject,
|
||||
}
|
||||
}, {});
|
||||
const run_meta = {
|
||||
...parsedRun,
|
||||
status: 'ABORTED',
|
||||
finishedAt: null,
|
||||
duration: '',
|
||||
browserId: null,
|
||||
log: currentLog,
|
||||
};
|
||||
|
||||
fs.mkdirSync('../storage/runs', { recursive: true })
|
||||
await saveFile(
|
||||
`../storage/runs/${parsedRun.name}_${req.params.runId}.json`,
|
||||
JSON.stringify({ ...run_meta, serializableOutput, binaryOutput }, null, 2)
|
||||
);
|
||||
return res.send(true);
|
||||
} catch (e) {
|
||||
const {message} = e as Error;
|
||||
logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`);
|
||||
return res.send(false);
|
||||
}
|
||||
});
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Action, ActionType, Coordinates, TagName } from "../../types";
|
||||
import { WhereWhatPair, WorkflowFile } from '@wbr-project/wbr-interpret';
|
||||
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
@@ -484,9 +484,9 @@ export class WorkflowGenerator {
|
||||
public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => {
|
||||
const rect = await getRect(page, coordinates);
|
||||
const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click);
|
||||
//console.log('Backend Rectangle:', rect)
|
||||
const elementInfo = await getElementInformation(page, coordinates);
|
||||
if (rect) {
|
||||
this.socket.emit('highlighter', { rect, selector: displaySelector });
|
||||
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import Interpreter, { WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import Interpreter, { WorkflowFile } from "maxun-core";
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
@@ -8,7 +8,7 @@ import { InterpreterSettings } from "../../types";
|
||||
* This class implements the main interpretation functions.
|
||||
* It holds some information about the current interpretation process and
|
||||
* registers to some events to allow the client (frontend) to interact with the interpreter.
|
||||
* It uses the [@wbr-project/wbr-interpret](https://www.npmjs.com/package/@wbr-project/wbr-interpret)
|
||||
* It uses the [maxun-core](https://www.npmjs.com/package/maxun-core)
|
||||
* library to interpret the workflow.
|
||||
* @category WorkflowManagement
|
||||
*/
|
||||
@@ -26,7 +26,7 @@ export class WorkflowInterpreter {
|
||||
|
||||
/**
|
||||
* The instance of the {@link Interpreter} class used to interpret the workflow.
|
||||
* From @wbr-project/wbr-interpret.
|
||||
* From maxun-core.
|
||||
* @private
|
||||
*/
|
||||
private interpreter: Interpreter | null = null;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Page } from "playwright";
|
||||
import { Action, ActionType, Coordinates, TagName } from "../types";
|
||||
import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import logger from "../logger";
|
||||
import { getBestSelectorForAction } from "./utils";
|
||||
|
||||
@@ -111,7 +111,7 @@ export const getElementInformation = async (
|
||||
console.log(`Element innerText: ${elementInfo.innerText}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return elementInfo;
|
||||
} catch (error) {
|
||||
const { message, stack } = error as Error;
|
||||
|
||||
120
src/api/storage.ts
Normal file
120
src/api/storage.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import { default as axios } from "axios";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { RunSettings } from "../components/molecules/RunSettings";
|
||||
import { CreateRunResponse } from "../pages/MainPage";
|
||||
|
||||
export const getStoredRecordings = async (): Promise<string[] | null> => {
|
||||
try {
|
||||
const response = await axios.get('http://localhost:8080/storage/recordings');
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error('Couldn\'t retrieve stored recordings');
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
export const getStoredRuns = async (): Promise<string[] | null> => {
|
||||
try {
|
||||
const response = await axios.get('http://localhost:8080/storage/runs');
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error('Couldn\'t retrieve stored recordings');
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
export const deleteRecordingFromStorage = async (fileName: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await axios.delete(`http://localhost:8080/storage/recordings/${fileName}`);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't delete stored recording ${fileName}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
export const deleteRunFromStorage = async (fileName: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await axios.delete(`http://localhost:8080/storage/runs/${fileName}`);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't delete stored recording ${fileName}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
export const editRecordingFromStorage = async (browserId: string, fileName: string): Promise<WorkflowFile | null> => {
|
||||
try {
|
||||
const response = await axios.put(`http://localhost:8080/workflow/${browserId}/${fileName}`);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't edit stored recording ${fileName}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
export const createRunForStoredRecording = async (fileName: string, settings: RunSettings): Promise<CreateRunResponse> => {
|
||||
try {
|
||||
const response = await axios.put(
|
||||
`http://localhost:8080/storage/runs/${fileName}`,
|
||||
{...settings});
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't create a run for a recording ${fileName}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return {browserId: '', runId: ''};
|
||||
}
|
||||
}
|
||||
|
||||
export const interpretStoredRecording = async (fileName: string, runId: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await axios.post(`http://localhost:8080/storage/runs/run/${fileName}/${runId}`);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't run a recording ${fileName}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export const notifyAboutAbort = async (fileName: string, runId:string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await axios.post(`http://localhost:8080/storage/runs/abort/${fileName}/${runId}`);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
throw new Error(`Couldn't abort a running recording ${fileName} with id ${runId}`);
|
||||
}
|
||||
} catch(error: any) {
|
||||
console.log(error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import { emptyWorkflow } from "../shared/constants";
|
||||
|
||||
const axios = require('axios').default;
|
||||
|
||||
29
src/components/atoms/ConfirmationBox.tsx
Normal file
29
src/components/atoms/ConfirmationBox.tsx
Normal file
@@ -0,0 +1,29 @@
|
||||
import React from 'react';
|
||||
import { Box, Button, IconButton, Stack, Typography } from "@mui/material";
|
||||
|
||||
interface ConfirmationBoxProps {
|
||||
selector: string;
|
||||
onYes: () => void;
|
||||
onNo: () => void;
|
||||
}
|
||||
|
||||
export const ConfirmationBox = ({ selector, onYes, onNo }: ConfirmationBoxProps) => {
|
||||
return (
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Typography variant="h6" component="h2" gutterBottom>
|
||||
Confirmation
|
||||
</Typography>
|
||||
<Typography variant="body1" gutterBottom>
|
||||
Do you want to interact with the element: {selector}?
|
||||
</Typography>
|
||||
<Box sx={{ mt: 2, display: 'flex', justifyContent: 'center', gap: 2 }}>
|
||||
<Button variant="contained" color="primary" onClick={onYes}>
|
||||
Yes
|
||||
</Button>
|
||||
<Button variant="contained" color="secondary" onClick={onNo}>
|
||||
No
|
||||
</Button>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -41,4 +41,5 @@ const defaultModalStyle = {
|
||||
display: 'block',
|
||||
overflow: 'scroll',
|
||||
padding: '5px 25px 10px 25px',
|
||||
zIndex: 3147483647,
|
||||
};
|
||||
|
||||
@@ -24,9 +24,9 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei
|
||||
};
|
||||
|
||||
|
||||
console.log('unmodifiedRect:', unmodifiedRect)
|
||||
console.log('rectangle:', rect)
|
||||
console.log('canvas rectangle:', canvasRect)
|
||||
//console.log('unmodifiedRect:', unmodifiedRect)
|
||||
//console.log('rectangle:', rect)
|
||||
//console.log('canvas rectangle:', canvasRect)
|
||||
|
||||
return (
|
||||
<div>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import React, { FC } from 'react';
|
||||
import Typography from '@mui/material/Typography';
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import styled from "styled-components";
|
||||
|
||||
interface PairDisplayDivProps {
|
||||
|
||||
@@ -2,6 +2,7 @@ import React, { useCallback, useEffect, useRef } from 'react';
|
||||
import { useSocketStore } from '../../context/socket';
|
||||
import { getMappedCoordinates } from "../../helpers/inputHelpers";
|
||||
import { useGlobalInfoStore } from "../../context/globalInfo";
|
||||
import { useActionContext } from '../../context/browserActions';
|
||||
|
||||
interface CreateRefCallback {
|
||||
(ref: React.RefObject<HTMLCanvasElement>): void;
|
||||
@@ -26,6 +27,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||
const { socket } = useSocketStore();
|
||||
const { setLastAction, lastAction } = useGlobalInfoStore();
|
||||
const { getText, getScreenshot } = useActionContext();
|
||||
const getTextRef = useRef(getText);
|
||||
|
||||
const notifyLastAction = (action: string) => {
|
||||
if (lastAction !== action) {
|
||||
@@ -34,7 +37,10 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
||||
};
|
||||
|
||||
const lastMousePosition = useRef<Coordinates>({ x: 0, y: 0 });
|
||||
//const lastWheelPosition = useRef<ScrollDeltas>({ deltaX: 0, deltaY: 0 });
|
||||
|
||||
useEffect(() => {
|
||||
getTextRef.current = getText;
|
||||
}, [getText]);
|
||||
|
||||
const onMouseEvent = useCallback((event: MouseEvent) => {
|
||||
if (socket) {
|
||||
@@ -45,7 +51,11 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
||||
switch (event.type) {
|
||||
case 'mousedown':
|
||||
const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height);
|
||||
socket.emit('input:mousedown', clickCoordinates);
|
||||
if (getTextRef.current === true) {
|
||||
console.log('get text')
|
||||
} else {
|
||||
socket.emit('input:mousedown', clickCoordinates);
|
||||
}
|
||||
notifyLastAction('click');
|
||||
break;
|
||||
case 'mousemove':
|
||||
|
||||
@@ -20,16 +20,10 @@ export const ActionSettings = ({ action }: ActionSettingsProps) => {
|
||||
return <Settings.ScreenshotSettings ref={settingsRef} />;
|
||||
case 'scroll':
|
||||
return <Settings.ScrollSettings ref={settingsRef} />;
|
||||
case 'scrape':
|
||||
return <Settings.ScrapeSettings ref={settingsRef} />;
|
||||
case 'scrape':
|
||||
return <Settings.ScrapeSettings ref={settingsRef} />;
|
||||
case 'scrapeSchema':
|
||||
return <Settings.ScrapeSchemaSettings ref={settingsRef} />;
|
||||
case 'script':
|
||||
return <Settings.ScriptSettings ref={settingsRef} />;
|
||||
case 'enqueueLinks':
|
||||
return <Settings.EnqueueLinksSettings ref={settingsRef} />;
|
||||
case 'mouse.click':
|
||||
return <Settings.ClickOnCoordinatesSettings ref={settingsRef} />;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import { GenericModal } from "../atoms/GenericModal";
|
||||
import { modalStyle } from "./AddWhereCondModal";
|
||||
import { Button, MenuItem, TextField, Typography } from "@mui/material";
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
} from "@mui/material";
|
||||
import React, { useRef } from "react";
|
||||
import { GenericModal } from "../atoms/GenericModal";
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import { SelectChangeEvent } from "@mui/material/Select/Select";
|
||||
import { DisplayConditionSettings } from "./DisplayWhereConditionSettings";
|
||||
import { useSocketStore } from "../../context/socket";
|
||||
|
||||
@@ -5,7 +5,7 @@ import { interpretCurrentRecording, stopCurrentInterpretation } from "../../api/
|
||||
import { useSocketStore } from "../../context/socket";
|
||||
import { useGlobalInfoStore } from "../../context/globalInfo";
|
||||
import { GenericModal } from "../atoms/GenericModal";
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import HelpIcon from '@mui/icons-material/Help';
|
||||
|
||||
interface InterpretationButtonsProps {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import React, { useCallback, useEffect, useState } from 'react';
|
||||
import Box from "@mui/material/Box";
|
||||
import { Pair } from "./Pair";
|
||||
import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import { useSocketStore } from "../../context/socket";
|
||||
import { Add } from "@mui/icons-material";
|
||||
import { Socket } from "socket.io-client";
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import React, { FC, useState } from 'react';
|
||||
import { Stack, Button, IconButton, Tooltip, Chip, Badge } from "@mui/material";
|
||||
import { AddPair, deletePair, UpdatePair } from "../../api/workflow";
|
||||
import { WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { ClearButton } from "../atoms/buttons/ClearButton";
|
||||
import { GenericModal } from "../atoms/GenericModal";
|
||||
import { PairEditForm } from "./PairEditForm";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import React, { useLayoutEffect, useRef, useState } from 'react';
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import { Box, Button, IconButton, MenuItem, Stack, TextField, Tooltip, Typography } from "@mui/material";
|
||||
import { Close, KeyboardArrowDown, KeyboardArrowUp } from "@mui/icons-material";
|
||||
import TreeView from '@mui/lab/TreeView';
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Button, TextField, Typography } from "@mui/material";
|
||||
import React, { FC } from "react";
|
||||
import { Preprocessor, WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { Preprocessor, WhereWhatPair } from "maxun-core";
|
||||
|
||||
interface PairProps {
|
||||
index: string;
|
||||
|
||||
@@ -8,7 +8,7 @@ import TableHead from '@mui/material/TableHead';
|
||||
import TablePagination from '@mui/material/TablePagination';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
import { useEffect } from "react";
|
||||
import { WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { IconButton } from "@mui/material";
|
||||
import { Assignment, DeleteForever, Edit, PlayCircle } from "@mui/icons-material";
|
||||
import { useGlobalInfoStore } from "../../context/globalInfo";
|
||||
|
||||
@@ -3,7 +3,7 @@ import { InterpretationButtons } from "./InterpretationButtons";
|
||||
import { AddButton } from "../atoms/buttons/AddButton";
|
||||
import { GenericModal } from "../atoms/GenericModal";
|
||||
import { PairEditForm } from "./PairEditForm";
|
||||
import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import { AddPair } from "../../api/workflow";
|
||||
import { Button, Stack } from "@mui/material";
|
||||
import { FastForward } from "@mui/icons-material";
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import React, { forwardRef, useImperativeHandle } from 'react';
|
||||
import { Stack, TextField } from "@mui/material";
|
||||
import { WarningText } from '../../atoms/texts';
|
||||
import InfoIcon from "@mui/icons-material/Info";
|
||||
|
||||
export const ClickOnCoordinatesSettings = forwardRef((props, ref) => {
|
||||
const [settings, setSettings] = React.useState<number[]>([0, 0]);
|
||||
useImperativeHandle(ref, () => ({
|
||||
getSettings() {
|
||||
return settings;
|
||||
}
|
||||
}));
|
||||
|
||||
return (
|
||||
<Stack direction="column">
|
||||
<TextField
|
||||
sx={{ marginLeft: '15px', marginRight: '15px' }}
|
||||
type="number"
|
||||
label="X"
|
||||
onChange={(e) => setSettings(prevState => ([Number(e.target.value), prevState[1]]))}
|
||||
required
|
||||
defaultValue={settings[0]}
|
||||
/>
|
||||
<TextField
|
||||
sx={{ margin: '15px' }}
|
||||
type="number"
|
||||
label="Y"
|
||||
onChange={(e) => setSettings(prevState => ([prevState[0], Number(e.target.value)]))}
|
||||
required
|
||||
defaultValue={settings[1]}
|
||||
/>
|
||||
<WarningText>
|
||||
<InfoIcon color='warning' />
|
||||
The click function will click on the given coordinates.
|
||||
You need to put the coordinates by yourself.
|
||||
</WarningText>
|
||||
</Stack>
|
||||
);
|
||||
});
|
||||
@@ -1,32 +0,0 @@
|
||||
import React, { forwardRef, useImperativeHandle } from 'react';
|
||||
import { Stack, TextField } from "@mui/material";
|
||||
import { WarningText } from "../../atoms/texts";
|
||||
import WarningIcon from "@mui/icons-material/Warning";
|
||||
import InfoIcon from "@mui/icons-material/Info";
|
||||
|
||||
export const EnqueueLinksSettings = forwardRef((props, ref) => {
|
||||
const [settings, setSettings] = React.useState<string>('');
|
||||
useImperativeHandle(ref, () => ({
|
||||
getSettings() {
|
||||
return settings;
|
||||
}
|
||||
}));
|
||||
|
||||
return (
|
||||
<Stack direction="column">
|
||||
<TextField
|
||||
sx={{ marginLeft: '15px', marginRight: '15px' }}
|
||||
type="string"
|
||||
label="Selector"
|
||||
required
|
||||
onChange={(e) => setSettings(e.target.value)}
|
||||
/>
|
||||
<WarningText>
|
||||
<InfoIcon color='warning' />
|
||||
Reads elements targeted by the selector and stores their links in a queue.
|
||||
Those pages are then processed using the same workflow as the initial page
|
||||
(in parallel if the maxConcurrency parameter is greater than 1).
|
||||
</WarningText>
|
||||
</Stack>
|
||||
);
|
||||
});
|
||||
@@ -2,16 +2,10 @@ import { ScrollSettings } from './scroll';
|
||||
import { ScreenshotSettings } from "./screenshot";
|
||||
import { ScrapeSettings } from "./scrape";
|
||||
import { ScrapeSchemaSettings } from "./scrapeSchema";
|
||||
import { ScriptSettings } from "./script";
|
||||
import { EnqueueLinksSettings } from "./enqueueLinks";
|
||||
import { ClickOnCoordinatesSettings } from "./clickOnCoordinates";
|
||||
|
||||
export {
|
||||
ScrollSettings,
|
||||
ScreenshotSettings,
|
||||
ScrapeSettings,
|
||||
ScrapeSchemaSettings,
|
||||
ScriptSettings,
|
||||
EnqueueLinksSettings,
|
||||
ClickOnCoordinatesSettings,
|
||||
};
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
import React, { forwardRef, useImperativeHandle } from 'react';
|
||||
import Editor from 'react-simple-code-editor';
|
||||
// @ts-ignore
|
||||
import { highlight, languages } from 'prismjs/components/prism-core';
|
||||
import 'prismjs/components/prism-clike';
|
||||
import 'prismjs/components/prism-javascript';
|
||||
import 'prismjs/themes/prism.css';
|
||||
import styled from "styled-components";
|
||||
import InfoIcon from '@mui/icons-material/Info';
|
||||
import { WarningText } from "../../atoms/texts";
|
||||
|
||||
export const ScriptSettings = forwardRef((props, ref) => {
|
||||
const [code, setCode] = React.useState('');
|
||||
|
||||
useImperativeHandle(ref, () => ({
|
||||
getSettings() {
|
||||
return code;
|
||||
}
|
||||
}));
|
||||
|
||||
return (
|
||||
<EditorWrapper>
|
||||
<WarningText>
|
||||
<InfoIcon color='warning' />
|
||||
Allows to run an arbitrary asynchronous function evaluated at the server
|
||||
side accepting the current page instance argument.
|
||||
</WarningText>
|
||||
<StyledEditor
|
||||
placeholder="Type some code…"
|
||||
value={code}
|
||||
onValueChange={code => setCode(code)}
|
||||
highlight={code => highlight(code, languages.js)}
|
||||
padding={10}
|
||||
style={{
|
||||
fontFamily: '"Fira code", "Fira Mono", monospace',
|
||||
fontSize: 12,
|
||||
background: '#f0f0f0',
|
||||
}}
|
||||
/>
|
||||
</EditorWrapper>
|
||||
);
|
||||
});
|
||||
|
||||
const EditorWrapper = styled.div`
|
||||
flex: 1;
|
||||
overflow: auto;
|
||||
/** hard-coded height */
|
||||
height: 100%;
|
||||
width: 100%;
|
||||
`;
|
||||
|
||||
const StyledEditor = styled(Editor)`
|
||||
white-space: pre;
|
||||
caret-color: #fff;
|
||||
min-width: 100%;
|
||||
min-height: 100%;
|
||||
float: left;
|
||||
& > textarea,
|
||||
& > pre {
|
||||
outline: none;
|
||||
white-space: pre !important;
|
||||
}
|
||||
`;
|
||||
@@ -133,4 +133,4 @@ export const BrowserContent = () => {
|
||||
|
||||
const BrowserContentWrapper = styled.div`
|
||||
grid-area: browser;
|
||||
`;
|
||||
`;
|
||||
@@ -3,17 +3,52 @@ import { useSocketStore } from '../../context/socket';
|
||||
import Canvas from "../atoms/canvas";
|
||||
import { useBrowserDimensionsStore } from "../../context/browserDimensions";
|
||||
import { Highlighter } from "../atoms/Highlighter";
|
||||
import { GenericModal } from '../atoms/GenericModal';
|
||||
import { useActionContext } from '../../context/browserActions';
|
||||
import { useBrowserSteps } from '../../context/browserSteps';
|
||||
|
||||
interface ElementInfo {
|
||||
tagName: string;
|
||||
hasOnlyText?: boolean;
|
||||
innerText?: string;
|
||||
url?: string;
|
||||
imageUrl?: string;
|
||||
}
|
||||
|
||||
interface AttributeOption {
|
||||
label: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
const getAttributeOptions = (tagName: string): AttributeOption[] => {
|
||||
switch (tagName.toLowerCase()) {
|
||||
case 'a':
|
||||
return [
|
||||
{ label: 'Text', value: 'innerText' },
|
||||
{ label: 'URL', value: 'href' }
|
||||
];
|
||||
case 'img':
|
||||
return [
|
||||
{ label: 'Alt Text', value: 'alt' },
|
||||
{ label: 'Source URL', value: 'src' }
|
||||
];
|
||||
default:
|
||||
return [{ label: 'Text', value: 'innerText' }];
|
||||
}
|
||||
};
|
||||
|
||||
export const BrowserWindow = () => {
|
||||
|
||||
const [canvasRef, setCanvasReference] = useState<React.RefObject<HTMLCanvasElement> | undefined>(undefined);
|
||||
const [screenShot, setScreenShot] = useState<string>("");
|
||||
const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null);
|
||||
const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string, elementInfo: ElementInfo | null; } | null>(null);
|
||||
const [showAttributeModal, setShowAttributeModal] = useState(false);
|
||||
const [attributeOptions, setAttributeOptions] = useState<AttributeOption[]>([]);
|
||||
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
|
||||
|
||||
const { socket } = useSocketStore();
|
||||
const { width, height } = useBrowserDimensionsStore();
|
||||
|
||||
console.log('Use browser dimensions:', width, height)
|
||||
const { getText } = useActionContext();
|
||||
const { addTextStep } = useBrowserSteps();
|
||||
|
||||
const onMouseMove = (e: MouseEvent) => {
|
||||
if (canvasRef && canvasRef.current && highlighterData) {
|
||||
@@ -46,13 +81,10 @@ export const BrowserWindow = () => {
|
||||
return () => {
|
||||
socket?.off("screencast", screencastHandler);
|
||||
}
|
||||
|
||||
}, [screenShot, canvasRef, socket, screencastHandler]);
|
||||
|
||||
|
||||
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string }) => {
|
||||
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => {
|
||||
setHighlighterData(data);
|
||||
console.log('Highlighter Rect via socket:', data.rect)
|
||||
}, [highlighterData])
|
||||
|
||||
useEffect(() => {
|
||||
@@ -60,16 +92,94 @@ export const BrowserWindow = () => {
|
||||
if (socket) {
|
||||
socket.on("highlighter", highlighterHandler);
|
||||
}
|
||||
//cleaning function
|
||||
return () => {
|
||||
document.removeEventListener('mousemove', onMouseMove);
|
||||
socket?.off("highlighter", highlighterHandler);
|
||||
};
|
||||
}, [socket, onMouseMove]);
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
if (highlighterData && canvasRef?.current) {
|
||||
const canvasRect = canvasRef.current.getBoundingClientRect();
|
||||
const clickX = e.clientX - canvasRect.left;
|
||||
const clickY = e.clientY - canvasRect.top;
|
||||
|
||||
const highlightRect = highlighterData.rect;
|
||||
if (
|
||||
clickX >= highlightRect.left &&
|
||||
clickX <= highlightRect.right &&
|
||||
clickY >= highlightRect.top &&
|
||||
clickY <= highlightRect.bottom
|
||||
) {
|
||||
if (getText === true) {
|
||||
const options = getAttributeOptions(highlighterData.elementInfo?.tagName || '');
|
||||
if (options.length > 1) {
|
||||
setAttributeOptions(options);
|
||||
setSelectedElement({
|
||||
selector: highlighterData.selector,
|
||||
info: highlighterData.elementInfo
|
||||
});
|
||||
setShowAttributeModal(true);
|
||||
} else {
|
||||
addTextStep('', highlighterData.elementInfo?.innerText || '', {
|
||||
selector: highlighterData.selector,
|
||||
tag: highlighterData.elementInfo?.tagName,
|
||||
attribute: 'innerText'
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleAttributeSelection = (attribute: string) => {
|
||||
if (selectedElement) {
|
||||
let data = '';
|
||||
switch (attribute) {
|
||||
case 'href':
|
||||
data = selectedElement.info?.url || '';
|
||||
break;
|
||||
case 'src':
|
||||
data = selectedElement.info?.imageUrl || '';
|
||||
break;
|
||||
default:
|
||||
data = selectedElement.info?.innerText || '';
|
||||
}
|
||||
{
|
||||
if (getText === true) {
|
||||
addTextStep('', data, {
|
||||
selector: selectedElement.selector,
|
||||
tag: selectedElement.info?.tagName,
|
||||
attribute: attribute
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
setShowAttributeModal(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
{(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
|
||||
<div onClick={handleClick}>
|
||||
{
|
||||
getText === true ? (
|
||||
<GenericModal
|
||||
isOpen={showAttributeModal}
|
||||
onClose={() => { }}
|
||||
canBeClosed={false}
|
||||
>
|
||||
<div>
|
||||
<h2>Select Attribute</h2>
|
||||
{attributeOptions.map((option) => (
|
||||
<button key={option.value} onClick={() => handleAttributeSelection(option.value)}>
|
||||
{option.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
</GenericModal>
|
||||
) : null
|
||||
}
|
||||
{(getText === true && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
|
||||
<Highlighter
|
||||
unmodifiedRect={highlighterData?.rect}
|
||||
displayedSelector={highlighterData?.selector}
|
||||
@@ -83,7 +193,7 @@ export const BrowserWindow = () => {
|
||||
width={width}
|
||||
height={height}
|
||||
/>
|
||||
</>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -97,8 +207,6 @@ const drawImage = (image: string, canvas: HTMLCanvasElement): void => {
|
||||
img.onload = () => {
|
||||
URL.revokeObjectURL(img.src);
|
||||
ctx?.drawImage(img, 0, 0, 1280, 720);
|
||||
console.log('Image drawn on canvas:', img.width, img.height);
|
||||
console.log('Image drawn on canvas:', canvas.width, canvas.height);
|
||||
};
|
||||
|
||||
};
|
||||
@@ -2,7 +2,7 @@ import { Box, Paper, Tab, Tabs } from "@mui/material";
|
||||
import React, { useCallback, useEffect, useState } from "react";
|
||||
import { getActiveWorkflow, getParamsOfActiveWorkflow } from "../../api/workflow";
|
||||
import { useSocketStore } from '../../context/socket';
|
||||
import { WhereWhatPair, WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import { SidePanelHeader } from "../molecules/SidePanelHeader";
|
||||
import { emptyWorkflow } from "../../shared/constants";
|
||||
import { LeftSidePanelContent } from "../molecules/LeftSidePanelContent";
|
||||
|
||||
@@ -1,96 +1,188 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import { Button, MenuItem, Paper, Stack, Tabs, Tab } from "@mui/material";
|
||||
import { Dropdown as MuiDropdown } from '../atoms/DropdownMui';
|
||||
import React, { useState, useCallback } from 'react';
|
||||
import { Button, Paper, Box, TextField } from "@mui/material";
|
||||
import EditIcon from '@mui/icons-material/Edit';
|
||||
import TextFieldsIcon from '@mui/icons-material/TextFields';
|
||||
import DocumentScannerIcon from '@mui/icons-material/DocumentScanner';
|
||||
import styled from "styled-components";
|
||||
import { ActionSettings } from "../molecules/ActionSettings";
|
||||
import { SelectChangeEvent } from "@mui/material/Select/Select";
|
||||
import { SimpleBox } from "../atoms/Box";
|
||||
import Typography from "@mui/material/Typography";
|
||||
import { useGlobalInfoStore } from "../../context/globalInfo";
|
||||
import { PairForEdit } from "../../pages/RecordingPage";
|
||||
import { useActionContext } from '../../context/browserActions';
|
||||
import { useBrowserSteps } from '../../context/browserSteps';
|
||||
import { useSocketStore } from '../../context/socket';
|
||||
import { ScreenshotSettings } from '../../shared/types';
|
||||
import InputAdornment from '@mui/material/InputAdornment';
|
||||
|
||||
interface RightSidePanelProps {
|
||||
pairForEdit: PairForEdit;
|
||||
}
|
||||
|
||||
export const RightSidePanel = ({pairForEdit}: RightSidePanelProps) => {
|
||||
export const RightSidePanel = () => {
|
||||
const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({});
|
||||
const [errors, setErrors] = useState<{ [id: number]: string }>({});
|
||||
const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({});
|
||||
|
||||
const [content, setContent] = useState<string>('action');
|
||||
const [action, setAction] = React.useState<string>('');
|
||||
const [isSettingsDisplayed, setIsSettingsDisplayed] = React.useState<boolean>(false);
|
||||
const { lastAction, notify } = useGlobalInfoStore();
|
||||
const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot } = useActionContext();
|
||||
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps();
|
||||
const { socket } = useSocketStore();
|
||||
|
||||
const { lastAction } = useGlobalInfoStore();
|
||||
|
||||
const handleChange = (event: React.SyntheticEvent, newValue: string) => {
|
||||
setContent(newValue);
|
||||
};
|
||||
|
||||
const handleActionSelect = (event: SelectChangeEvent) => {
|
||||
const { value } = event.target;
|
||||
setAction(value);
|
||||
setIsSettingsDisplayed(true);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (content !== 'detail' && pairForEdit.pair !== null) {
|
||||
setContent('detail');
|
||||
const handleTextLabelChange = (id: number, label: string) => {
|
||||
setTextLabels(prevLabels => ({ ...prevLabels, [id]: label }));
|
||||
if (!label.trim()) {
|
||||
setErrors(prevErrors => ({ ...prevErrors, [id]: 'Label cannot be empty' }));
|
||||
} else {
|
||||
setErrors(prevErrors => ({ ...prevErrors, [id]: '' }));
|
||||
}
|
||||
}, [pairForEdit])
|
||||
};
|
||||
|
||||
const handleTextStepConfirm = (id: number) => {
|
||||
const label = textLabels[id]?.trim();
|
||||
if (label) {
|
||||
updateBrowserTextStepLabel(id, label);
|
||||
setConfirmedTextSteps(prev => ({ ...prev, [id]: true }));
|
||||
} else {
|
||||
setErrors(prevErrors => ({ ...prevErrors, [id]: 'Label cannot be empty' }));
|
||||
}
|
||||
};
|
||||
|
||||
const handleTextStepDiscard = (id: number) => {
|
||||
deleteBrowserStep(id);
|
||||
setTextLabels(prevLabels => {
|
||||
const { [id]: _, ...rest } = prevLabels;
|
||||
return rest;
|
||||
});
|
||||
setErrors(prevErrors => {
|
||||
const { [id]: _, ...rest } = prevErrors;
|
||||
return rest;
|
||||
});
|
||||
};
|
||||
|
||||
const getTextSettingsObject = useCallback(() => {
|
||||
const settings: Record<string, { selector: string; tag?: string;[key: string]: any }> = {};
|
||||
browserSteps.forEach(step => {
|
||||
if (step.type === 'text' && step.label && step.selectorObj?.selector) {
|
||||
settings[step.label] = step.selectorObj;
|
||||
}
|
||||
});
|
||||
return settings;
|
||||
}, [browserSteps]);
|
||||
|
||||
|
||||
const stopCaptureAndEmitGetTextSettings = useCallback(() => {
|
||||
const hasUnconfirmedTextSteps = browserSteps.some(step => step.type === 'text' && !confirmedTextSteps[step.id]);
|
||||
if (hasUnconfirmedTextSteps) {
|
||||
notify('error', 'Please confirm no labels are empty');
|
||||
return;
|
||||
}
|
||||
stopGetText();
|
||||
const settings = getTextSettingsObject();
|
||||
const hasTextSteps = browserSteps.some(step => step.type === 'text');
|
||||
if (hasTextSteps) {
|
||||
socket?.emit('action', { action: 'scrapeSchema', settings });
|
||||
}
|
||||
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]);
|
||||
|
||||
const captureScreenshot = (fullPage: boolean) => {
|
||||
const screenshotSettings: ScreenshotSettings = {
|
||||
fullPage,
|
||||
type: 'png',
|
||||
timeout: 30000,
|
||||
animations: 'allow',
|
||||
caret: 'hide',
|
||||
scale: 'device',
|
||||
};
|
||||
socket?.emit('action', { action: 'screenshot', settings: screenshotSettings });
|
||||
addScreenshotStep(fullPage);
|
||||
stopGetScreenshot();
|
||||
};
|
||||
|
||||
return (
|
||||
<Paper
|
||||
variant="outlined"
|
||||
sx={{
|
||||
height: '100%',
|
||||
width: '100%',
|
||||
backgroundColor: 'white',
|
||||
alignItems: "center",
|
||||
}}>
|
||||
<Paper variant="outlined" sx={{ height: '100%', width: '100%', backgroundColor: 'white', alignItems: "center" }}>
|
||||
<SimpleBox height={60} width='100%' background='lightGray' radius='0%'>
|
||||
<Typography sx={{ padding: '10px' }}>
|
||||
Last action:
|
||||
{` ${lastAction}`}
|
||||
</Typography>
|
||||
<Typography sx={{ padding: '10px' }}>Last action: {` ${lastAction}`}</Typography>
|
||||
</SimpleBox>
|
||||
|
||||
{content === 'action' ? (
|
||||
<React.Fragment>
|
||||
<ActionDescription>Type of action:</ActionDescription>
|
||||
<ActionTypeWrapper>
|
||||
<MuiDropdown
|
||||
id="action"
|
||||
label="Action"
|
||||
value={action}
|
||||
handleSelect={handleActionSelect}>
|
||||
<MenuItem value="mouse.click">click on coordinates</MenuItem>
|
||||
<MenuItem value="enqueueLinks">enqueueLinks</MenuItem>
|
||||
<MenuItem value="scrape">scrape</MenuItem>
|
||||
<MenuItem value="scrapeSchema">scrapeSchema</MenuItem>
|
||||
<MenuItem value="screenshot">screenshot</MenuItem>
|
||||
<MenuItem value="script">script</MenuItem>
|
||||
<MenuItem value="scroll">scroll</MenuItem>
|
||||
</MuiDropdown>
|
||||
</ActionTypeWrapper>
|
||||
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '15px' }}>
|
||||
{!getText && !getScreenshot && <Button variant="contained" onClick={startGetText}>Capture Text</Button>}
|
||||
{getText &&
|
||||
<>
|
||||
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
|
||||
<Button variant="outlined" onClick={stopCaptureAndEmitGetTextSettings}>Confirm</Button>
|
||||
<Button variant="outlined" color="error" onClick={stopGetText}>Discard</Button>
|
||||
</Box>
|
||||
</>
|
||||
}
|
||||
|
||||
{isSettingsDisplayed &&
|
||||
<ActionSettings action={action}/>
|
||||
{!getText && !getScreenshot && <Button variant="contained" onClick={startGetScreenshot}>Capture Screenshot</Button>}
|
||||
{getScreenshot && (
|
||||
<Box display="flex" flexDirection="column" gap={2}>
|
||||
<Button variant="contained" onClick={() => captureScreenshot(true)}>Capture Fullpage</Button>
|
||||
<Button variant="contained" onClick={() => captureScreenshot(false)}>Capture Visible Part</Button>
|
||||
<Button variant="outlined" color="error" onClick={stopGetScreenshot}>Discard</Button>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
<Box>
|
||||
{browserSteps.map(step => (
|
||||
<Box key={step.id} sx={{ boxShadow: 5, padding: '10px', margin: '10px', borderRadius: '4px' }}>
|
||||
{
|
||||
step.type === 'text' ? (
|
||||
<>
|
||||
<TextField
|
||||
label="Label"
|
||||
value={textLabels[step.id] || step.label || ''}
|
||||
onChange={(e) => handleTextLabelChange(step.id, e.target.value)}
|
||||
fullWidth
|
||||
margin="normal"
|
||||
error={!!errors[step.id]}
|
||||
helperText={errors[step.id]}
|
||||
InputProps={{
|
||||
readOnly: confirmedTextSteps[step.id],
|
||||
startAdornment: (
|
||||
<InputAdornment position="start">
|
||||
<EditIcon />
|
||||
</InputAdornment>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
<TextField
|
||||
label="Data"
|
||||
value={step.data}
|
||||
fullWidth
|
||||
margin="normal"
|
||||
InputProps={{
|
||||
readOnly: confirmedTextSteps[step.id],
|
||||
startAdornment: (
|
||||
<InputAdornment position="start">
|
||||
<TextFieldsIcon />
|
||||
</InputAdornment>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
{!confirmedTextSteps[step.id] && (
|
||||
<Box display="flex" justifyContent="space-between" gap={2}>
|
||||
<Button variant="contained" onClick={() => handleTextStepConfirm(step.id)} disabled={!textLabels[step.id]?.trim()}>Confirm</Button>
|
||||
<Button variant="contained" onClick={() => handleTextStepDiscard(step.id)}>Discard</Button>
|
||||
</Box>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
step.type === 'screenshot' && (
|
||||
<Box display="flex" alignItems="center">
|
||||
<DocumentScannerIcon sx={{ mr: 1 }} />
|
||||
<Typography>
|
||||
{`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`}
|
||||
</Typography>
|
||||
</Box>
|
||||
)
|
||||
)
|
||||
}
|
||||
</React.Fragment>
|
||||
)
|
||||
: null
|
||||
}
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</Paper>
|
||||
);
|
||||
};
|
||||
|
||||
const ActionTypeWrapper = styled.div`
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
margin-top: 20px;
|
||||
`;
|
||||
|
||||
export const ActionDescription = styled.p`
|
||||
margin-left: 15px;
|
||||
`;
|
||||
|
||||
37
src/context/browserActions.tsx
Normal file
37
src/context/browserActions.tsx
Normal file
@@ -0,0 +1,37 @@
|
||||
import React, { createContext, useContext, useState, ReactNode } from 'react';
|
||||
|
||||
interface ActionContextProps {
|
||||
getText: boolean;
|
||||
getScreenshot: boolean;
|
||||
startGetText: () => void;
|
||||
stopGetText: () => void;
|
||||
startGetScreenshot: () => void;
|
||||
stopGetScreenshot: () => void;
|
||||
}
|
||||
|
||||
const ActionContext = createContext<ActionContextProps | undefined>(undefined);
|
||||
|
||||
export const ActionProvider = ({ children }: { children: ReactNode }) => {
|
||||
const [getText, setGetText] = useState<boolean>(false);
|
||||
const [getScreenshot, setGetScreenshot] = useState<boolean>(false);
|
||||
|
||||
const startGetText = () => setGetText(true);
|
||||
const stopGetText = () => setGetText(false);
|
||||
|
||||
const startGetScreenshot = () => setGetScreenshot(true);
|
||||
const stopGetScreenshot = () => setGetScreenshot(false);
|
||||
|
||||
return (
|
||||
<ActionContext.Provider value={{ getText, getScreenshot, startGetText, stopGetText, startGetScreenshot, stopGetScreenshot }}>
|
||||
{children}
|
||||
</ActionContext.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
export const useActionContext = () => {
|
||||
const context = useContext(ActionContext);
|
||||
if (context === undefined) {
|
||||
throw new Error('useActionContext must be used within an ActionProvider');
|
||||
}
|
||||
return context;
|
||||
};
|
||||
85
src/context/browserSteps.tsx
Normal file
85
src/context/browserSteps.tsx
Normal file
@@ -0,0 +1,85 @@
|
||||
import React, { createContext, useContext, useState } from 'react';
|
||||
|
||||
interface TextStep {
|
||||
id: number;
|
||||
type: 'text';
|
||||
label: string;
|
||||
data: string;
|
||||
selectorObj: SelectorObject;
|
||||
}
|
||||
|
||||
interface ScreenshotStep {
|
||||
id: number;
|
||||
type: 'screenshot';
|
||||
fullPage: boolean;
|
||||
}
|
||||
|
||||
|
||||
type BrowserStep = TextStep | ScreenshotStep;
|
||||
|
||||
interface SelectorObject {
|
||||
selector: string;
|
||||
tag?: string;
|
||||
attribute?: string;
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
interface BrowserStepsContextType {
|
||||
browserSteps: BrowserStep[];
|
||||
addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void;
|
||||
addScreenshotStep: (fullPage: boolean) => void;
|
||||
deleteBrowserStep: (id: number) => void;
|
||||
updateBrowserTextStepLabel: (id: number, newLabel: string) => void;
|
||||
}
|
||||
|
||||
const BrowserStepsContext = createContext<BrowserStepsContextType | undefined>(undefined);
|
||||
|
||||
export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => {
|
||||
const [browserSteps, setBrowserSteps] = useState<BrowserStep[]>([]);
|
||||
|
||||
const addTextStep = (label: string, data: string, selectorObj: SelectorObject) => {
|
||||
setBrowserSteps(prevSteps => [
|
||||
...prevSteps,
|
||||
{ id: Date.now(), type: 'text', label, data, selectorObj }
|
||||
]);
|
||||
};
|
||||
|
||||
const addScreenshotStep = (fullPage: boolean) => {
|
||||
setBrowserSteps(prevSteps => [
|
||||
...prevSteps,
|
||||
{ id: Date.now(), type: 'screenshot', fullPage }
|
||||
]);
|
||||
};
|
||||
|
||||
const deleteBrowserStep = (id: number) => {
|
||||
setBrowserSteps(prevSteps => prevSteps.filter(step => step.id !== id));
|
||||
};
|
||||
|
||||
const updateBrowserTextStepLabel = (id: number, newLabel: string) => {
|
||||
setBrowserSteps(prevSteps =>
|
||||
prevSteps.map(step =>
|
||||
step.id === id ? { ...step, label: newLabel } : step
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<BrowserStepsContext.Provider value={{
|
||||
browserSteps,
|
||||
addTextStep,
|
||||
addScreenshotStep,
|
||||
deleteBrowserStep,
|
||||
updateBrowserTextStepLabel,
|
||||
}}>
|
||||
{children}
|
||||
</BrowserStepsContext.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
export const useBrowserSteps = () => {
|
||||
const context = useContext(BrowserStepsContext);
|
||||
if (!context) {
|
||||
throw new Error('useBrowserSteps must be used within a BrowserStepsProvider');
|
||||
}
|
||||
return context;
|
||||
};
|
||||
@@ -5,6 +5,12 @@ body {
|
||||
sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
scrollbar-gutter: stable;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
code {
|
||||
|
||||
@@ -15,7 +15,7 @@ export const PageWrapper = () => {
|
||||
const [recordingName, setRecordingName] = useState('');
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
const { browserId, setBrowserId, notification } = useGlobalInfoStore();
|
||||
const { browserId, setBrowserId, notification } = useGlobalInfoStore();
|
||||
|
||||
const handleNewRecording = () => {
|
||||
setBrowserId('new-recording');
|
||||
@@ -27,15 +27,15 @@ export const PageWrapper = () => {
|
||||
setBrowserId('new-recording');
|
||||
}
|
||||
|
||||
const isNotification = (): boolean=> {
|
||||
if (notification.isOpen && !open){
|
||||
const isNotification = (): boolean => {
|
||||
if (notification.isOpen && !open) {
|
||||
setOpen(true);
|
||||
}
|
||||
return notification.isOpen;
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const isRecordingInProgress = async() => {
|
||||
const isRecordingInProgress = async () => {
|
||||
const id = await getActiveBrowserId();
|
||||
if (id) {
|
||||
setBrowserId(id);
|
||||
@@ -48,26 +48,26 @@ export const PageWrapper = () => {
|
||||
<div>
|
||||
<SocketProvider>
|
||||
<React.Fragment>
|
||||
<NavBar newRecording={handleNewRecording} recordingName={recordingName} isRecording={!!browserId}/>
|
||||
{browserId
|
||||
? (
|
||||
<BrowserDimensionsProvider>
|
||||
<React.Fragment>
|
||||
<RecordingPage recordingName={recordingName}/>
|
||||
<InterpretationLog/>
|
||||
</React.Fragment>
|
||||
</BrowserDimensionsProvider>
|
||||
)
|
||||
: <MainPage
|
||||
handleEditRecording={handleEditRecording}
|
||||
/>
|
||||
}
|
||||
<NavBar newRecording={handleNewRecording} recordingName={recordingName} isRecording={!!browserId} />
|
||||
{browserId
|
||||
? (
|
||||
<BrowserDimensionsProvider>
|
||||
<React.Fragment>
|
||||
<RecordingPage recordingName={recordingName} />
|
||||
<InterpretationLog />
|
||||
</React.Fragment>
|
||||
</BrowserDimensionsProvider>
|
||||
)
|
||||
: <MainPage
|
||||
handleEditRecording={handleEditRecording}
|
||||
/>
|
||||
}
|
||||
</React.Fragment>
|
||||
</SocketProvider>
|
||||
{ isNotification() ?
|
||||
{isNotification() ?
|
||||
<AlertSnackbar severity={notification.severity}
|
||||
message={notification.message}
|
||||
isOpen={notification.isOpen}/>
|
||||
message={notification.message}
|
||||
isOpen={notification.isOpen} />
|
||||
: null
|
||||
}
|
||||
</div>
|
||||
|
||||
@@ -7,9 +7,11 @@ import { RightSidePanel } from "../components/organisms/RightSidePanel";
|
||||
import { Loader } from "../components/atoms/Loader";
|
||||
import { useSocketStore } from "../context/socket";
|
||||
import { useBrowserDimensionsStore } from "../context/browserDimensions";
|
||||
import { ActionProvider } from "../context/browserActions"
|
||||
import { BrowserStepsProvider } from '../context/browserSteps';
|
||||
import { useGlobalInfoStore } from "../context/globalInfo";
|
||||
import { editRecordingFromStorage } from "../api/storage";
|
||||
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import styled from "styled-components";
|
||||
|
||||
interface RecordingPageProps {
|
||||
@@ -104,26 +106,30 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => {
|
||||
}, [socket, handleLoaded]);
|
||||
|
||||
return (
|
||||
<div>
|
||||
{isLoaded ?
|
||||
<Grid container direction="row" spacing={0}>
|
||||
<Grid item xs={2} ref={workflowListRef} style={{ display: "flex", flexDirection: "row" }}>
|
||||
<LeftSidePanel
|
||||
sidePanelRef={workflowListRef.current}
|
||||
alreadyHasScrollbar={hasScrollbar}
|
||||
recordingName={recordingName ? recordingName : ''}
|
||||
handleSelectPairForEdit={handleSelectPairForEdit}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid id="browser-content" ref={browserContentRef} item xs>
|
||||
<BrowserContent />
|
||||
</Grid>
|
||||
<Grid item xs={2}>
|
||||
<RightSidePanel pairForEdit={pairForEdit} />
|
||||
</Grid>
|
||||
</Grid>
|
||||
: <Loader />}
|
||||
</div>
|
||||
<ActionProvider>
|
||||
<BrowserStepsProvider>
|
||||
<div>
|
||||
{isLoaded ?
|
||||
<Grid container direction="row" spacing={0}>
|
||||
<Grid item xs={2} ref={workflowListRef} style={{ display: "flex", flexDirection: "row" }}>
|
||||
<LeftSidePanel
|
||||
sidePanelRef={workflowListRef.current}
|
||||
alreadyHasScrollbar={hasScrollbar}
|
||||
recordingName={recordingName ? recordingName : ''}
|
||||
handleSelectPairForEdit={handleSelectPairForEdit}
|
||||
/>
|
||||
</Grid>
|
||||
<Grid id="browser-content" ref={browserContentRef} item xs>
|
||||
<BrowserContent />
|
||||
</Grid>
|
||||
<Grid item xs={2}>
|
||||
<RightSidePanel />
|
||||
</Grid>
|
||||
</Grid>
|
||||
: <Loader />}
|
||||
</div>
|
||||
</BrowserStepsProvider>
|
||||
</ActionProvider>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
import { WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
|
||||
export const emptyWorkflow: WorkflowFile = { workflow: [] };
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { WorkflowFile } from "@wbr-project/wbr-interpret";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { Locator } from "playwright";
|
||||
|
||||
export type Workflow = WorkflowFile["workflow"];
|
||||
@@ -15,7 +15,7 @@ export interface ScreenshotSettings {
|
||||
fullPage?: boolean;
|
||||
mask?: Locator[];
|
||||
omitBackground?: boolean;
|
||||
// is this still needed? - @wbr-project/wbr-interpret outputs to a binary output
|
||||
// is this still needed? - maxun-core outputs to a binary output
|
||||
path?: string;
|
||||
quality?: number;
|
||||
scale?: "css" | "device";
|
||||
|
||||
Reference in New Issue
Block a user