Merge pull request #675 from getmaxun/smart-list
feat: better, faster, smarter capture list
This commit is contained in:
@@ -423,44 +423,149 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
||||||
*/
|
*/
|
||||||
window.scrapeList = async function ({ listSelector, fields, limit = 10 }) {
|
window.scrapeList = async function ({ listSelector, fields, limit = 10 }) {
|
||||||
// Enhanced query function to handle iframe, frame and shadow DOM
|
// XPath evaluation functions
|
||||||
const queryElement = (rootElement, selector) => {
|
const evaluateXPath = (rootElement, xpath) => {
|
||||||
if (!selector.includes('>>') && !selector.includes(':>>')) {
|
try {
|
||||||
return rootElement.querySelector(selector);
|
const ownerDoc =
|
||||||
|
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||||
|
? rootElement
|
||||||
|
: rootElement.ownerDocument;
|
||||||
|
|
||||||
|
if (!ownerDoc) return null;
|
||||||
|
|
||||||
|
const result = ownerDoc.evaluate(
|
||||||
|
xpath,
|
||||||
|
rootElement,
|
||||||
|
null,
|
||||||
|
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
return result.singleNodeValue;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("XPath evaluation failed:", xpath, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const evaluateXPathAll = (rootElement, xpath) => {
|
||||||
|
try {
|
||||||
|
const ownerDoc =
|
||||||
|
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||||
|
? rootElement
|
||||||
|
: rootElement.ownerDocument;
|
||||||
|
|
||||||
|
if (!ownerDoc) return [];
|
||||||
|
|
||||||
|
const result = ownerDoc.evaluate(
|
||||||
|
xpath,
|
||||||
|
rootElement,
|
||||||
|
null,
|
||||||
|
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
const elements = [];
|
||||||
|
for (let i = 0; i < result.snapshotLength; i++) {
|
||||||
|
const node = result.snapshotItem(i);
|
||||||
|
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||||
|
elements.push(node);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
return elements;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("XPath evaluation failed:", xpath, error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Helper function to detect selector type
|
||||||
|
const isXPathSelector = (selector) => {
|
||||||
|
return (
|
||||||
|
selector.startsWith("//") ||
|
||||||
|
selector.startsWith("/") ||
|
||||||
|
selector.startsWith("./")
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Enhanced query function to handle iframe, frame, shadow DOM, CSS selectors, and XPath
|
||||||
|
const queryElement = (rootElement, selector) => {
|
||||||
|
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||||
|
// Check if it's an XPath selector
|
||||||
|
if (isXPathSelector(selector)) {
|
||||||
|
return evaluateXPath(rootElement, selector);
|
||||||
|
} else {
|
||||||
|
return rootElement.querySelector(selector);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||||
let currentElement = rootElement;
|
let currentElement = rootElement;
|
||||||
|
|
||||||
for (let i = 0; i < parts.length; i++) {
|
for (let i = 0; i < parts.length; i++) {
|
||||||
if (!currentElement) return null;
|
if (!currentElement) return null;
|
||||||
|
|
||||||
// Handle iframe and frame traversal
|
// Handle iframe and frame traversal
|
||||||
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
|
if (
|
||||||
|
currentElement.tagName === "IFRAME" ||
|
||||||
|
currentElement.tagName === "FRAME"
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
const frameDoc = currentElement.contentDocument || currentElement.contentWindow.document;
|
const frameDoc =
|
||||||
|
currentElement.contentDocument ||
|
||||||
|
currentElement.contentWindow.document;
|
||||||
|
if (!frameDoc) return null;
|
||||||
|
|
||||||
|
if (isXPathSelector(parts[i])) {
|
||||||
|
currentElement = evaluateXPath(frameDoc, parts[i]);
|
||||||
|
} else {
|
||||||
currentElement = frameDoc.querySelector(parts[i]);
|
currentElement = frameDoc.querySelector(parts[i]);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(`Cannot access ${currentElement.tagName.toLowerCase()} content:`, e);
|
console.warn(
|
||||||
|
`Cannot access ${currentElement.tagName.toLowerCase()} content:`,
|
||||||
|
e
|
||||||
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let nextElement = null;
|
||||||
|
|
||||||
// Try regular DOM first
|
// Try regular DOM first
|
||||||
let nextElement = currentElement.querySelector(parts[i]);
|
if ("querySelector" in currentElement) {
|
||||||
|
if (isXPathSelector(parts[i])) {
|
||||||
|
nextElement = evaluateXPath(currentElement, parts[i]);
|
||||||
|
} else {
|
||||||
|
nextElement = currentElement.querySelector(parts[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Try shadow DOM if not found
|
// Try shadow DOM if not found
|
||||||
if (!nextElement && currentElement.shadowRoot) {
|
if (
|
||||||
|
!nextElement &&
|
||||||
|
"shadowRoot" in currentElement &&
|
||||||
|
currentElement.shadowRoot
|
||||||
|
) {
|
||||||
|
if (isXPathSelector(parts[i])) {
|
||||||
|
nextElement = evaluateXPath(currentElement.shadowRoot, parts[i]);
|
||||||
|
} else {
|
||||||
nextElement = currentElement.shadowRoot.querySelector(parts[i]);
|
nextElement = currentElement.shadowRoot.querySelector(parts[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check children's shadow roots if still not found
|
// Check children's shadow roots if still not found
|
||||||
if (!nextElement) {
|
if (!nextElement && "children" in currentElement) {
|
||||||
const children = Array.from(currentElement.children || []);
|
const children = Array.from(currentElement.children || []);
|
||||||
for (const child of children) {
|
for (const child of children) {
|
||||||
if (child.shadowRoot) {
|
if (child.shadowRoot) {
|
||||||
|
if (isXPathSelector(parts[i])) {
|
||||||
|
nextElement = evaluateXPath(child.shadowRoot, parts[i]);
|
||||||
|
} else {
|
||||||
nextElement = child.shadowRoot.querySelector(parts[i]);
|
nextElement = child.shadowRoot.querySelector(parts[i]);
|
||||||
|
}
|
||||||
if (nextElement) break;
|
if (nextElement) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -474,11 +579,15 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
// Enhanced query all function for both contexts
|
// Enhanced query all function for both contexts
|
||||||
const queryElementAll = (rootElement, selector) => {
|
const queryElementAll = (rootElement, selector) => {
|
||||||
if (!selector.includes('>>') && !selector.includes(':>>')) {
|
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||||
return rootElement.querySelectorAll(selector);
|
if (isXPathSelector(selector)) {
|
||||||
|
return evaluateXPathAll(rootElement, selector);
|
||||||
|
} else {
|
||||||
|
return Array.from(rootElement.querySelectorAll(selector));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||||
let currentElements = [rootElement];
|
let currentElements = [rootElement];
|
||||||
|
|
||||||
for (const part of parts) {
|
for (const part of parts) {
|
||||||
@@ -486,30 +595,64 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
for (const element of currentElements) {
|
for (const element of currentElements) {
|
||||||
// Handle iframe and frame traversal
|
// Handle iframe and frame traversal
|
||||||
if (element.tagName === 'IFRAME' || element.tagName === 'FRAME') {
|
if (element.tagName === "IFRAME" || element.tagName === "FRAME") {
|
||||||
try {
|
try {
|
||||||
const frameDoc = element.contentDocument || element.contentWindow.document;
|
const frameDoc =
|
||||||
nextElements.push(...frameDoc.querySelectorAll(part));
|
element.contentDocument || element.contentWindow.document;
|
||||||
|
if (frameDoc) {
|
||||||
|
if (isXPathSelector(part)) {
|
||||||
|
nextElements.push(...evaluateXPathAll(frameDoc, part));
|
||||||
|
} else {
|
||||||
|
nextElements.push(
|
||||||
|
...Array.from(frameDoc.querySelectorAll(part))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(`Cannot access ${element.tagName.toLowerCase()} content:`, e);
|
console.warn(
|
||||||
|
`Cannot access ${element.tagName.toLowerCase()} content:`,
|
||||||
|
e
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Regular DOM elements
|
// Regular DOM elements
|
||||||
if (element.querySelectorAll) {
|
if (element.querySelectorAll) {
|
||||||
nextElements.push(...element.querySelectorAll(part));
|
if (isXPathSelector(part)) {
|
||||||
|
nextElements.push(...evaluateXPathAll(element, part));
|
||||||
|
} else {
|
||||||
|
nextElements.push(
|
||||||
|
...Array.from(element.querySelectorAll(part))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shadow DOM elements
|
// Shadow DOM elements
|
||||||
if (element.shadowRoot) {
|
if (element.shadowRoot) {
|
||||||
nextElements.push(...element.shadowRoot.querySelectorAll(part));
|
if (isXPathSelector(part)) {
|
||||||
|
nextElements.push(
|
||||||
|
...evaluateXPathAll(element.shadowRoot, part)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
nextElements.push(
|
||||||
|
...Array.from(element.shadowRoot.querySelectorAll(part))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check children's shadow roots
|
// Check children's shadow roots
|
||||||
const children = Array.from(element.children || []);
|
const children = Array.from(element.children || []);
|
||||||
for (const child of children) {
|
for (const child of children) {
|
||||||
if (child.shadowRoot) {
|
if (child.shadowRoot) {
|
||||||
nextElements.push(...child.shadowRoot.querySelectorAll(part));
|
if (isXPathSelector(part)) {
|
||||||
|
nextElements.push(
|
||||||
|
...evaluateXPathAll(child.shadowRoot, part)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
nextElements.push(
|
||||||
|
...Array.from(child.shadowRoot.querySelectorAll(part))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -522,11 +665,12 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Enhanced value extraction with context awareness
|
// Enhanced value extraction with context awareness
|
||||||
function extractValue(element, attribute) {
|
const extractValue = (element, attribute) => {
|
||||||
if (!element) return null;
|
if (!element) return null;
|
||||||
|
|
||||||
// Get context-aware base URL
|
// Get context-aware base URL
|
||||||
const baseURL = element.ownerDocument?.location?.href || window.location.origin;
|
const baseURL =
|
||||||
|
element.ownerDocument?.location?.href || window.location.origin;
|
||||||
|
|
||||||
// Check shadow root first
|
// Check shadow root first
|
||||||
if (element.shadowRoot) {
|
if (element.shadowRoot) {
|
||||||
@@ -536,15 +680,37 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attribute === 'innerText') {
|
if (attribute === "innerText") {
|
||||||
return element.innerText.trim();
|
// First try standard innerText/textContent
|
||||||
} else if (attribute === 'innerHTML') {
|
let textContent =
|
||||||
return element.innerHTML.trim();
|
element.innerText?.trim() || element.textContent?.trim();
|
||||||
} else if (attribute === 'src' || attribute === 'href') {
|
|
||||||
if (attribute === 'href' && element.tagName !== 'A') {
|
// If empty, check for common data attributes that might contain the text
|
||||||
|
if (!textContent) {
|
||||||
|
const dataAttributes = [
|
||||||
|
"data-600",
|
||||||
|
"data-text",
|
||||||
|
"data-label",
|
||||||
|
"data-value",
|
||||||
|
"data-content",
|
||||||
|
];
|
||||||
|
for (const attr of dataAttributes) {
|
||||||
|
const dataValue = element.getAttribute(attr);
|
||||||
|
if (dataValue && dataValue.trim()) {
|
||||||
|
textContent = dataValue.trim();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return textContent || null;
|
||||||
|
} else if (attribute === "innerHTML") {
|
||||||
|
return element.innerHTML?.trim() || null;
|
||||||
|
} else if (attribute === "src" || attribute === "href") {
|
||||||
|
if (attribute === "href" && element.tagName !== "A") {
|
||||||
const parentElement = element.parentElement;
|
const parentElement = element.parentElement;
|
||||||
if (parentElement && parentElement.tagName === 'A') {
|
if (parentElement && parentElement.tagName === "A") {
|
||||||
const parentHref = parentElement.getAttribute('href');
|
const parentHref = parentElement.getAttribute("href");
|
||||||
if (parentHref) {
|
if (parentHref) {
|
||||||
try {
|
try {
|
||||||
return new URL(parentHref, baseURL).href;
|
return new URL(parentHref, baseURL).href;
|
||||||
@@ -556,13 +722,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const attrValue = element.getAttribute(attribute);
|
const attrValue = element.getAttribute(attribute);
|
||||||
const dataAttr = attrValue || element.getAttribute('data-' + attribute);
|
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
||||||
|
|
||||||
if (!dataAttr || dataAttr.trim() === '') {
|
if (!dataAttr || dataAttr.trim() === "") {
|
||||||
if (attribute === 'src') {
|
if (attribute === "src") {
|
||||||
const style = window.getComputedStyle(element);
|
const style = window.getComputedStyle(element);
|
||||||
const bgImage = style.backgroundImage;
|
const bgImage = style.backgroundImage;
|
||||||
if (bgImage && bgImage !== 'none') {
|
if (bgImage && bgImage !== "none") {
|
||||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
||||||
return matches ? new URL(matches[1], baseURL).href : null;
|
return matches ? new URL(matches[1], baseURL).href : null;
|
||||||
}
|
}
|
||||||
@@ -573,15 +739,15 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
try {
|
try {
|
||||||
return new URL(dataAttr, baseURL).href;
|
return new URL(dataAttr, baseURL).href;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn('Error creating URL from', dataAttr, e);
|
console.warn("Error creating URL from", dataAttr, e);
|
||||||
return dataAttr; // Return the original value if URL construction fails
|
return dataAttr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return element.getAttribute(attribute);
|
return element.getAttribute(attribute);
|
||||||
}
|
};
|
||||||
|
|
||||||
// Enhanced table ancestor finding with context support
|
// Enhanced table ancestor finding with context support
|
||||||
function findTableAncestor(element) {
|
const findTableAncestor = (element) => {
|
||||||
let currentElement = element;
|
let currentElement = element;
|
||||||
const MAX_DEPTH = 5;
|
const MAX_DEPTH = 5;
|
||||||
let depth = 0;
|
let depth = 0;
|
||||||
@@ -593,14 +759,17 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (currentElement.tagName === 'TD') {
|
if (currentElement.tagName === "TD") {
|
||||||
return { type: 'TD', element: currentElement };
|
return { type: "TD", element: currentElement };
|
||||||
} else if (currentElement.tagName === 'TR') {
|
} else if (currentElement.tagName === "TR") {
|
||||||
return { type: 'TR', element: currentElement };
|
return { type: "TR", element: currentElement };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle iframe and frame crossing
|
// Handle iframe and frame crossing
|
||||||
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
|
if (
|
||||||
|
currentElement.tagName === "IFRAME" ||
|
||||||
|
currentElement.tagName === "FRAME"
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
currentElement = currentElement.contentDocument.body;
|
currentElement = currentElement.contentDocument.body;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -612,26 +781,26 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
depth++;
|
depth++;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
};
|
||||||
|
|
||||||
// Helper function to get cell index
|
// Helper function to get cell index
|
||||||
function getCellIndex(td) {
|
const getCellIndex = (td) => {
|
||||||
if (td.getRootNode() instanceof ShadowRoot) {
|
if (td.getRootNode() instanceof ShadowRoot) {
|
||||||
const shadowRoot = td.getRootNode();
|
const shadowRoot = td.getRootNode();
|
||||||
const allCells = Array.from(shadowRoot.querySelectorAll('td'));
|
const allCells = Array.from(shadowRoot.querySelectorAll("td"));
|
||||||
return allCells.indexOf(td);
|
return allCells.indexOf(td);
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = 0;
|
let index = 0;
|
||||||
let sibling = td;
|
let sibling = td;
|
||||||
while (sibling = sibling.previousElementSibling) {
|
while ((sibling = sibling.previousElementSibling)) {
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
return index;
|
return index;
|
||||||
}
|
};
|
||||||
|
|
||||||
// Helper function to check for TH elements
|
// Helper function to check for TH elements
|
||||||
function hasThElement(row, tableFields) {
|
const hasThElement = (row, tableFields) => {
|
||||||
for (const [_, { selector }] of Object.entries(tableFields)) {
|
for (const [_, { selector }] of Object.entries(tableFields)) {
|
||||||
const element = queryElement(row, selector);
|
const element = queryElement(row, selector);
|
||||||
if (element) {
|
if (element) {
|
||||||
@@ -642,9 +811,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current.tagName === 'TH') return true;
|
if (current.tagName === "TH") return true;
|
||||||
|
|
||||||
if (current.tagName === 'IFRAME' || current.tagName === 'FRAME') {
|
if (current.tagName === "IFRAME" || current.tagName === "FRAME") {
|
||||||
try {
|
try {
|
||||||
current = current.contentDocument.body;
|
current = current.contentDocument.body;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -657,35 +826,35 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
};
|
||||||
|
|
||||||
// Helper function to filter rows
|
// Helper function to filter rows
|
||||||
function filterRowsBasedOnTag(rows, tableFields) {
|
const filterRowsBasedOnTag = (rows, tableFields) => {
|
||||||
for (const row of rows) {
|
for (const row of rows) {
|
||||||
if (hasThElement(row, tableFields)) {
|
if (hasThElement(row, tableFields)) {
|
||||||
return rows;
|
return rows;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Include shadow DOM in TH search
|
return rows.filter((row) => {
|
||||||
return rows.filter(row => {
|
const directTH = row.getElementsByTagName("TH").length === 0;
|
||||||
const directTH = row.getElementsByTagName('TH').length === 0;
|
const shadowTH = row.shadowRoot
|
||||||
const shadowTH = row.shadowRoot ?
|
? row.shadowRoot.querySelector("th") === null
|
||||||
row.shadowRoot.querySelector('th') === null : true;
|
: true;
|
||||||
return directTH && shadowTH;
|
return directTH && shadowTH;
|
||||||
});
|
});
|
||||||
}
|
};
|
||||||
|
|
||||||
// Class similarity comparison functions
|
// Class similarity comparison functions
|
||||||
function calculateClassSimilarity(classList1, classList2) {
|
const calculateClassSimilarity = (classList1, classList2) => {
|
||||||
const set1 = new Set(classList1);
|
const set1 = new Set(classList1);
|
||||||
const set2 = new Set(classList2);
|
const set2 = new Set(classList2);
|
||||||
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
||||||
const union = new Set([...set1, ...set2]);
|
const union = new Set([...set1, ...set2]);
|
||||||
return intersection.size / union.size;
|
return intersection.size / union.size;
|
||||||
}
|
};
|
||||||
|
|
||||||
// Enhanced similar elements finding with context support
|
// Enhanced similar elements finding with context support
|
||||||
function findSimilarElements(baseElement, similarityThreshold = 0.7) {
|
const findSimilarElements = (baseElement, similarityThreshold = 0.7) => {
|
||||||
const baseClasses = Array.from(baseElement.classList);
|
const baseClasses = Array.from(baseElement.classList);
|
||||||
if (baseClasses.length === 0) return [];
|
if (baseClasses.length === 0) return [];
|
||||||
|
|
||||||
@@ -697,25 +866,33 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
// Get elements from shadow DOM
|
// Get elements from shadow DOM
|
||||||
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
||||||
const shadowHost = baseElement.getRootNode().host;
|
const shadowHost = baseElement.getRootNode().host;
|
||||||
allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName));
|
allElements.push(
|
||||||
|
...shadowHost.getElementsByTagName(baseElement.tagName)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get elements from iframes and frames
|
// Get elements from iframes and frames
|
||||||
const frames = [
|
const frames = [
|
||||||
...Array.from(document.getElementsByTagName('iframe')),
|
...Array.from(document.getElementsByTagName("iframe")),
|
||||||
...Array.from(document.getElementsByTagName('frame'))
|
...Array.from(document.getElementsByTagName("frame")),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const frame of frames) {
|
for (const frame of frames) {
|
||||||
try {
|
try {
|
||||||
const frameDoc = frame.contentDocument || frame.contentWindow.document;
|
const frameDoc =
|
||||||
allElements.push(...frameDoc.getElementsByTagName(baseElement.tagName));
|
frame.contentDocument || frame.contentWindow.document;
|
||||||
|
allElements.push(
|
||||||
|
...frameDoc.getElementsByTagName(baseElement.tagName)
|
||||||
|
);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e);
|
console.warn(
|
||||||
|
`Cannot access ${frame.tagName.toLowerCase()} content:`,
|
||||||
|
e
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return allElements.filter(element => {
|
return allElements.filter((element) => {
|
||||||
if (element === baseElement) return false;
|
if (element === baseElement) return false;
|
||||||
const similarity = calculateClassSimilarity(
|
const similarity = calculateClassSimilarity(
|
||||||
baseClasses,
|
baseClasses,
|
||||||
@@ -723,45 +900,92 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
);
|
);
|
||||||
return similarity >= similarityThreshold;
|
return similarity >= similarityThreshold;
|
||||||
});
|
});
|
||||||
}
|
};
|
||||||
|
|
||||||
function tryFallbackSelector(rootElement, originalSelector) {
|
const tryFallbackSelector = (rootElement, originalSelector) => {
|
||||||
let element = queryElement(rootElement, originalSelector);
|
let element = queryElement(rootElement, originalSelector);
|
||||||
|
|
||||||
if (!element && originalSelector.includes('nth-child')) {
|
if (!element && originalSelector.includes("nth-child")) {
|
||||||
const match = originalSelector.match(/nth-child\((\d+)\)/);
|
const match = originalSelector.match(/nth-child\((\d+)\)/);
|
||||||
if (match) {
|
if (match) {
|
||||||
const position = parseInt(match[1], 10);
|
const position = parseInt(match[1], 10);
|
||||||
|
|
||||||
for (let i = position - 1; i >= 1; i--) {
|
for (let i = position - 1; i >= 1; i--) {
|
||||||
const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`);
|
const fallbackSelector = originalSelector.replace(
|
||||||
|
/nth-child\(\d+\)/,
|
||||||
|
`nth-child(${i})`
|
||||||
|
);
|
||||||
element = queryElement(rootElement, fallbackSelector);
|
element = queryElement(rootElement, fallbackSelector);
|
||||||
if (element) break;
|
if (element) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!element) {
|
if (!element) {
|
||||||
const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, '');
|
const baseSelector = originalSelector.replace(
|
||||||
|
/\:nth-child\(\d+\)/,
|
||||||
|
""
|
||||||
|
);
|
||||||
element = queryElement(rootElement, baseSelector);
|
element = queryElement(rootElement, baseSelector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return element;
|
return element;
|
||||||
}
|
};
|
||||||
|
|
||||||
|
// Create indexed XPath for specific container instance
|
||||||
|
const createIndexedXPath = (
|
||||||
|
childSelector,
|
||||||
|
listSelector,
|
||||||
|
containerIndex
|
||||||
|
) => {
|
||||||
|
// Check if the child selector contains the list selector pattern
|
||||||
|
if (childSelector.includes(listSelector.replace("//", ""))) {
|
||||||
|
// Replace the list selector part with indexed version
|
||||||
|
const listPattern = listSelector.replace("//", "");
|
||||||
|
const indexedListSelector = `(${listSelector})[${containerIndex}]`;
|
||||||
|
|
||||||
|
const indexedSelector = childSelector.replace(
|
||||||
|
`//${listPattern}`,
|
||||||
|
indexedListSelector
|
||||||
|
);
|
||||||
|
|
||||||
|
return indexedSelector;
|
||||||
|
} else {
|
||||||
|
// If pattern doesn't match, create a more generic indexed selector
|
||||||
|
return `(${listSelector})[${containerIndex}]${childSelector.replace(
|
||||||
|
"//",
|
||||||
|
"/"
|
||||||
|
)}`;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Main scraping logic with unified support for both CSS and XPath
|
||||||
|
console.log("🚀 Starting unified list data extraction");
|
||||||
|
console.log("List Selector:", listSelector);
|
||||||
|
console.log("Fields:", fields);
|
||||||
|
|
||||||
// Main scraping logic with context support
|
|
||||||
let containers = queryElementAll(document, listSelector);
|
let containers = queryElementAll(document, listSelector);
|
||||||
containers = Array.from(containers);
|
containers = Array.from(containers);
|
||||||
|
|
||||||
if (containers.length === 0) return [];
|
if (containers.length === 0) {
|
||||||
|
console.warn("❌ No containers found for listSelector:", listSelector);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
if (limit > 1 && containers.length === 1) {
|
console.log(`📦 Found ${containers.length} list containers`);
|
||||||
|
|
||||||
|
// For CSS selectors, try to find similar containers if needed
|
||||||
|
if (
|
||||||
|
!isXPathSelector(listSelector) &&
|
||||||
|
limit > 1 &&
|
||||||
|
containers.length === 1
|
||||||
|
) {
|
||||||
const baseContainer = containers[0];
|
const baseContainer = containers[0];
|
||||||
const similarContainers = findSimilarElements(baseContainer);
|
const similarContainers = findSimilarElements(baseContainer);
|
||||||
|
|
||||||
if (similarContainers.length > 0) {
|
if (similarContainers.length > 0) {
|
||||||
const newContainers = similarContainers.filter(container =>
|
const newContainers = similarContainers.filter(
|
||||||
!container.matches(listSelector)
|
(container) => !container.matches(listSelector)
|
||||||
);
|
);
|
||||||
containers = [...containers, ...newContainers];
|
containers = [...containers, ...newContainers];
|
||||||
}
|
}
|
||||||
@@ -769,10 +993,60 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
const containerFields = containers.map(() => ({
|
const containerFields = containers.map(() => ({
|
||||||
tableFields: {},
|
tableFields: {},
|
||||||
nonTableFields: {}
|
nonTableFields: {},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Classify fields
|
// For XPath selectors, use the new approach
|
||||||
|
if (isXPathSelector(listSelector)) {
|
||||||
|
const extractedData = [];
|
||||||
|
const containersToProcess = Math.min(containers.length, limit);
|
||||||
|
|
||||||
|
for (
|
||||||
|
let containerIndex = 0;
|
||||||
|
containerIndex < containersToProcess;
|
||||||
|
containerIndex++
|
||||||
|
) {
|
||||||
|
const record = {};
|
||||||
|
|
||||||
|
for (const [label, field] of Object.entries(fields)) {
|
||||||
|
let element = null;
|
||||||
|
|
||||||
|
if (isXPathSelector(field.selector)) {
|
||||||
|
// Create indexed absolute XPath
|
||||||
|
const indexedSelector = createIndexedXPath(
|
||||||
|
field.selector,
|
||||||
|
listSelector,
|
||||||
|
containerIndex + 1
|
||||||
|
);
|
||||||
|
element = evaluateXPath(document, indexedSelector);
|
||||||
|
} else {
|
||||||
|
// Fallback for CSS selectors within XPath containers
|
||||||
|
const container = containers[containerIndex];
|
||||||
|
element = queryElement(container, field.selector);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (element) {
|
||||||
|
const value = extractValue(element, field.attribute);
|
||||||
|
if (value !== null && value !== "") {
|
||||||
|
record[label] = value;
|
||||||
|
} else {
|
||||||
|
record[label] = "";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
record[label] = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Object.values(record).some((value) => value !== "")) {
|
||||||
|
extractedData.push(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`📊 Total records extracted: ${extractedData.length}`);
|
||||||
|
return extractedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For CSS selectors, use the original table-aware approach
|
||||||
containers.forEach((container, containerIndex) => {
|
containers.forEach((container, containerIndex) => {
|
||||||
for (const [label, field] of Object.entries(fields)) {
|
for (const [label, field] of Object.entries(fields)) {
|
||||||
const sampleElement = queryElement(container, field.selector);
|
const sampleElement = queryElement(container, field.selector);
|
||||||
@@ -783,7 +1057,8 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
containerFields[containerIndex].tableFields[label] = {
|
containerFields[containerIndex].tableFields[label] = {
|
||||||
...field,
|
...field,
|
||||||
tableContext: ancestor.type,
|
tableContext: ancestor.type,
|
||||||
cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
|
cellIndex:
|
||||||
|
ancestor.type === "TD" ? getCellIndex(ancestor.element) : -1,
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
containerFields[containerIndex].nonTableFields[label] = field;
|
containerFields[containerIndex].nonTableFields[label] = field;
|
||||||
@@ -798,7 +1073,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
const nonTableData = [];
|
const nonTableData = [];
|
||||||
|
|
||||||
// Process table data with support for iframes, frames, and shadow DOM
|
// Process table data with support for iframes, frames, and shadow DOM
|
||||||
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
for (
|
||||||
|
let containerIndex = 0;
|
||||||
|
containerIndex < containers.length;
|
||||||
|
containerIndex++
|
||||||
|
) {
|
||||||
const container = containers[containerIndex];
|
const container = containers[containerIndex];
|
||||||
const { tableFields } = containerFields[containerIndex];
|
const { tableFields } = containerFields[containerIndex];
|
||||||
|
|
||||||
@@ -808,13 +1087,20 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
let tableContext = firstElement;
|
let tableContext = firstElement;
|
||||||
|
|
||||||
// Find table context including iframe, frame and shadow DOM
|
// Find table context including iframe, frame and shadow DOM
|
||||||
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
|
while (
|
||||||
|
tableContext &&
|
||||||
|
tableContext.tagName !== "TABLE" &&
|
||||||
|
tableContext !== container
|
||||||
|
) {
|
||||||
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
||||||
tableContext = tableContext.getRootNode().host;
|
tableContext = tableContext.getRootNode().host;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
if (
|
||||||
|
tableContext.tagName === "IFRAME" ||
|
||||||
|
tableContext.tagName === "FRAME"
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
tableContext = tableContext.contentDocument.body;
|
tableContext = tableContext.contentDocument.body;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -830,30 +1116,45 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
const rows = [];
|
const rows = [];
|
||||||
|
|
||||||
// Get rows from regular DOM
|
// Get rows from regular DOM
|
||||||
rows.push(...tableContext.getElementsByTagName('TR'));
|
rows.push(...tableContext.getElementsByTagName("TR"));
|
||||||
|
|
||||||
// Get rows from shadow DOM
|
// Get rows from shadow DOM
|
||||||
if (tableContext.shadowRoot) {
|
if (tableContext.shadowRoot) {
|
||||||
rows.push(...tableContext.shadowRoot.getElementsByTagName('TR'));
|
rows.push(...tableContext.shadowRoot.getElementsByTagName("TR"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get rows from iframes and frames
|
// Get rows from iframes and frames
|
||||||
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
if (
|
||||||
|
tableContext.tagName === "IFRAME" ||
|
||||||
|
tableContext.tagName === "FRAME"
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
const frameDoc = tableContext.contentDocument || tableContext.contentWindow.document;
|
const frameDoc =
|
||||||
rows.push(...frameDoc.getElementsByTagName('TR'));
|
tableContext.contentDocument ||
|
||||||
|
tableContext.contentWindow.document;
|
||||||
|
rows.push(...frameDoc.getElementsByTagName("TR"));
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e);
|
console.warn(
|
||||||
|
`Cannot access ${tableContext.tagName.toLowerCase()} rows:`,
|
||||||
|
e
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const processedRows = filterRowsBasedOnTag(rows, tableFields);
|
const processedRows = filterRowsBasedOnTag(rows, tableFields);
|
||||||
|
|
||||||
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
|
for (
|
||||||
|
let rowIndex = 0;
|
||||||
|
rowIndex < Math.min(processedRows.length, limit);
|
||||||
|
rowIndex++
|
||||||
|
) {
|
||||||
const record = {};
|
const record = {};
|
||||||
const currentRow = processedRows[rowIndex];
|
const currentRow = processedRows[rowIndex];
|
||||||
|
|
||||||
for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
|
for (const [
|
||||||
|
label,
|
||||||
|
{ selector, attribute, cellIndex },
|
||||||
|
] of Object.entries(tableFields)) {
|
||||||
let element = null;
|
let element = null;
|
||||||
|
|
||||||
if (cellIndex >= 0) {
|
if (cellIndex >= 0) {
|
||||||
@@ -871,18 +1172,27 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
if (td) {
|
if (td) {
|
||||||
element = queryElement(td, selector);
|
element = queryElement(td, selector);
|
||||||
|
|
||||||
if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) {
|
if (
|
||||||
|
!element &&
|
||||||
|
selector
|
||||||
|
.split(/(?:>>|:>>)/)
|
||||||
|
.pop()
|
||||||
|
.includes("td:nth-child")
|
||||||
|
) {
|
||||||
element = td;
|
element = td;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!element) {
|
if (!element) {
|
||||||
const tagOnlySelector = selector.split('.')[0];
|
const tagOnlySelector = selector.split(".")[0];
|
||||||
element = queryElement(td, tagOnlySelector);
|
element = queryElement(td, tagOnlySelector);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!element) {
|
if (!element) {
|
||||||
let currentElement = td;
|
let currentElement = td;
|
||||||
while (currentElement && currentElement.children.length > 0) {
|
while (
|
||||||
|
currentElement &&
|
||||||
|
currentElement.children.length > 0
|
||||||
|
) {
|
||||||
let foundContentChild = false;
|
let foundContentChild = false;
|
||||||
for (const child of currentElement.children) {
|
for (const child of currentElement.children) {
|
||||||
if (extractValue(child, attribute)) {
|
if (extractValue(child, attribute)) {
|
||||||
@@ -914,7 +1224,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Process non-table data with all contexts support
|
// Process non-table data with all contexts support
|
||||||
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
for (
|
||||||
|
let containerIndex = 0;
|
||||||
|
containerIndex < containers.length;
|
||||||
|
containerIndex++
|
||||||
|
) {
|
||||||
if (nonTableData.length >= limit) break;
|
if (nonTableData.length >= limit) break;
|
||||||
|
|
||||||
const container = containers[containerIndex];
|
const container = containers[containerIndex];
|
||||||
@@ -923,7 +1237,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
if (Object.keys(nonTableFields).length > 0) {
|
if (Object.keys(nonTableFields).length > 0) {
|
||||||
const record = {};
|
const record = {};
|
||||||
|
|
||||||
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
|
for (const [label, { selector, attribute }] of Object.entries(
|
||||||
|
nonTableFields
|
||||||
|
)) {
|
||||||
// Get the last part of the selector after any context delimiter
|
// Get the last part of the selector after any context delimiter
|
||||||
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
||||||
const element = tryFallbackSelector(container, relativeSelector);
|
const element = tryFallbackSelector(container, relativeSelector);
|
||||||
@@ -941,6 +1257,8 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
// Merge and limit the results
|
// Merge and limit the results
|
||||||
const scrapedData = [...tableData, ...nonTableData];
|
const scrapedData = [...tableData, ...nonTableData];
|
||||||
|
console.log(`📊 Total records extracted: ${scrapedData.length}`);
|
||||||
|
|
||||||
return scrapedData;
|
return scrapedData;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1287,28 +1287,42 @@ export class RemoteBrowser {
|
|||||||
*/
|
*/
|
||||||
public registerEditorEvents = (): void => {
|
public registerEditorEvents = (): void => {
|
||||||
// For each event, include userId to make sure events are handled for the correct browser
|
// For each event, include userId to make sure events are handled for the correct browser
|
||||||
logger.log('debug', `Registering editor events for user: ${this.userId}`);
|
logger.log("debug", `Registering editor events for user: ${this.userId}`);
|
||||||
|
|
||||||
this.socket.on(`captureDirectScreenshot:${this.userId}`, async (settings) => {
|
this.socket.on(
|
||||||
logger.debug(`Direct screenshot capture requested for user ${this.userId}`);
|
`captureDirectScreenshot:${this.userId}`,
|
||||||
|
async (settings) => {
|
||||||
|
logger.debug(
|
||||||
|
`Direct screenshot capture requested for user ${this.userId}`
|
||||||
|
);
|
||||||
await this.captureDirectScreenshot(settings);
|
await this.captureDirectScreenshot(settings);
|
||||||
});
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// For backward compatibility
|
// For backward compatibility
|
||||||
this.socket.on('captureDirectScreenshot', async (settings) => {
|
this.socket.on("captureDirectScreenshot", async (settings) => {
|
||||||
await this.captureDirectScreenshot(settings);
|
await this.captureDirectScreenshot(settings);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Listen for specific events for this user
|
// Listen for specific events for this user
|
||||||
this.socket.on(`rerender:${this.userId}`, async () => {
|
this.socket.on(`rerender:${this.userId}`, async () => {
|
||||||
logger.debug(`Rerender event received for user ${this.userId}`);
|
logger.debug(`Rerender event received for user ${this.userId}`);
|
||||||
|
if (this.renderingMode === "dom") {
|
||||||
|
await this.makeAndEmitDOMSnapshot();
|
||||||
|
} else {
|
||||||
await this.makeAndEmitScreenshot();
|
await this.makeAndEmitScreenshot();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// For backward compatibility, also listen to the general event
|
this.socket.on("rerender", async () => {
|
||||||
this.socket.on('rerender', async () => {
|
logger.debug(
|
||||||
logger.debug(`General rerender event received, checking if for user ${this.userId}`);
|
`General rerender event received, checking if for user ${this.userId}`
|
||||||
|
);
|
||||||
|
if (this.renderingMode === "dom") {
|
||||||
|
await this.makeAndEmitDOMSnapshot();
|
||||||
|
} else {
|
||||||
await this.makeAndEmitScreenshot();
|
await this.makeAndEmitScreenshot();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on(`settings:${this.userId}`, (settings) => {
|
this.socket.on(`settings:${this.userId}`, (settings) => {
|
||||||
@@ -1317,19 +1331,25 @@ export class RemoteBrowser {
|
|||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
|
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
|
||||||
logger.debug(`Tab change to ${tabIndex} requested for user ${this.userId}`);
|
logger.debug(
|
||||||
|
`Tab change to ${tabIndex} requested for user ${this.userId}`
|
||||||
|
);
|
||||||
await this.changeTab(tabIndex);
|
await this.changeTab(tabIndex);
|
||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on(`addTab:${this.userId}`, async () => {
|
this.socket.on(`addTab:${this.userId}`, async () => {
|
||||||
logger.debug(`New tab requested for user ${this.userId}`);
|
logger.debug(`New tab requested for user ${this.userId}`);
|
||||||
await this.currentPage?.context().newPage();
|
await this.currentPage?.context().newPage();
|
||||||
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
const lastTabIndex = this.currentPage
|
||||||
|
? this.currentPage.context().pages().length - 1
|
||||||
|
: 0;
|
||||||
await this.changeTab(lastTabIndex);
|
await this.changeTab(lastTabIndex);
|
||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
|
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
|
||||||
logger.debug(`Close tab ${tabInfo.index} requested for user ${this.userId}`);
|
logger.debug(
|
||||||
|
`Close tab ${tabInfo.index} requested for user ${this.userId}`
|
||||||
|
);
|
||||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||||
if (page) {
|
if (page) {
|
||||||
if (tabInfo.isCurrent) {
|
if (tabInfo.isCurrent) {
|
||||||
@@ -1343,34 +1363,58 @@ export class RemoteBrowser {
|
|||||||
}
|
}
|
||||||
await page.close();
|
await page.close();
|
||||||
logger.log(
|
logger.log(
|
||||||
'debug',
|
"debug",
|
||||||
`Tab ${tabInfo.index} was closed for user ${this.userId}, new tab count: ${this.currentPage?.context().pages().length}`
|
`Tab ${tabInfo.index} was closed for user ${
|
||||||
|
this.userId
|
||||||
|
}, new tab count: ${this.currentPage?.context().pages().length}`
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
logger.log('error', `Tab index ${tabInfo.index} out of range for user ${this.userId}`);
|
logger.log(
|
||||||
|
"error",
|
||||||
|
`Tab index ${tabInfo.index} out of range for user ${this.userId}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on(`setViewportSize:${this.userId}`, async (data: { width: number, height: number }) => {
|
this.socket.on(
|
||||||
|
`setViewportSize:${this.userId}`,
|
||||||
|
async (data: { width: number; height: number }) => {
|
||||||
const { width, height } = data;
|
const { width, height } = data;
|
||||||
logger.log('debug', `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`);
|
logger.log(
|
||||||
|
"debug",
|
||||||
|
`Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`
|
||||||
|
);
|
||||||
|
|
||||||
// Update the browser context's viewport dynamically
|
// Update the browser context's viewport dynamically
|
||||||
if (this.context && this.browser) {
|
if (this.context && this.browser) {
|
||||||
this.context = await this.browser.newContext({ viewport: { width, height } });
|
this.context = await this.browser.newContext({
|
||||||
logger.log('debug', `Viewport size updated to width=${width}, height=${height} for user ${this.userId}`);
|
viewport: { width, height },
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
logger.log(
|
||||||
|
"debug",
|
||||||
|
`Viewport size updated to width=${width}, height=${height} for user ${this.userId}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// For backward compatibility, also register the standard events
|
// For backward compatibility, also register the standard events
|
||||||
this.socket.on('settings', (settings) => this.interpreterSettings = settings);
|
this.socket.on(
|
||||||
this.socket.on('changeTab', async (tabIndex) => await this.changeTab(tabIndex));
|
"settings",
|
||||||
this.socket.on('addTab', async () => {
|
(settings) => (this.interpreterSettings = settings)
|
||||||
|
);
|
||||||
|
this.socket.on(
|
||||||
|
"changeTab",
|
||||||
|
async (tabIndex) => await this.changeTab(tabIndex)
|
||||||
|
);
|
||||||
|
this.socket.on("addTab", async () => {
|
||||||
await this.currentPage?.context().newPage();
|
await this.currentPage?.context().newPage();
|
||||||
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
const lastTabIndex = this.currentPage
|
||||||
|
? this.currentPage.context().pages().length - 1
|
||||||
|
: 0;
|
||||||
await this.changeTab(lastTabIndex);
|
await this.changeTab(lastTabIndex);
|
||||||
});
|
});
|
||||||
this.socket.on('closeTab', async (tabInfo) => {
|
this.socket.on("closeTab", async (tabInfo) => {
|
||||||
const page = this.currentPage?.context().pages()[tabInfo.index];
|
const page = this.currentPage?.context().pages()[tabInfo.index];
|
||||||
if (page) {
|
if (page) {
|
||||||
if (tabInfo.isCurrent) {
|
if (tabInfo.isCurrent) {
|
||||||
@@ -1383,18 +1427,25 @@ export class RemoteBrowser {
|
|||||||
await page.close();
|
await page.close();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
this.socket.on('setViewportSize', async (data: { width: number, height: number }) => {
|
this.socket.on(
|
||||||
|
"setViewportSize",
|
||||||
|
async (data: { width: number; height: number }) => {
|
||||||
const { width, height } = data;
|
const { width, height } = data;
|
||||||
if (this.context && this.browser) {
|
if (this.context && this.browser) {
|
||||||
this.context = await this.browser.newContext({ viewport: { width, height } });
|
this.context = await this.browser.newContext({
|
||||||
}
|
viewport: { width, height },
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
this.socket.on('extractListData', async (data: {
|
this.socket.on(
|
||||||
listSelector: string,
|
"extractListData",
|
||||||
fields: Record<string, any>,
|
async (data: {
|
||||||
currentListId: number,
|
listSelector: string;
|
||||||
pagination: any
|
fields: Record<string, any>;
|
||||||
|
currentListId: number;
|
||||||
|
pagination: any;
|
||||||
}) => {
|
}) => {
|
||||||
if (this.currentPage) {
|
if (this.currentPage) {
|
||||||
const extractedData = await this.extractListData(
|
const extractedData = await this.extractListData(
|
||||||
@@ -1403,12 +1454,13 @@ export class RemoteBrowser {
|
|||||||
data.fields
|
data.fields
|
||||||
);
|
);
|
||||||
|
|
||||||
this.socket.emit('listDataExtracted', {
|
this.socket.emit("listDataExtracted", {
|
||||||
currentListId: data.currentListId,
|
currentListId: data.currentListId,
|
||||||
data: extractedData
|
data: extractedData,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
);
|
||||||
};
|
};
|
||||||
/**
|
/**
|
||||||
* Subscribes the remote browser for a screencast session
|
* Subscribes the remote browser for a screencast session
|
||||||
@@ -1481,10 +1533,7 @@ export class RemoteBrowser {
|
|||||||
* CDP-based DOM snapshot creation using captured network resources
|
* CDP-based DOM snapshot creation using captured network resources
|
||||||
*/
|
*/
|
||||||
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
||||||
if (
|
if (!this.currentPage || !this.isDOMStreamingActive) {
|
||||||
!this.currentPage ||
|
|
||||||
!this.isDOMStreamingActive
|
|
||||||
) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1537,6 +1586,7 @@ export class RemoteBrowser {
|
|||||||
if (typeof window.rrwebSnapshot === "undefined") {
|
if (typeof window.rrwebSnapshot === "undefined") {
|
||||||
throw new Error("rrweb-snapshot library not available");
|
throw new Error("rrweb-snapshot library not available");
|
||||||
}
|
}
|
||||||
|
|
||||||
return window.rrwebSnapshot.snapshot(document, {
|
return window.rrwebSnapshot.snapshot(document, {
|
||||||
inlineImages: true,
|
inlineImages: true,
|
||||||
collectFonts: true,
|
collectFonts: true,
|
||||||
@@ -1557,10 +1607,12 @@ export class RemoteBrowser {
|
|||||||
this.emitRRWebSnapshot(enhancedSnapshot);
|
this.emitRRWebSnapshot(enhancedSnapshot);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Handle navigation context destruction gracefully
|
// Handle navigation context destruction gracefully
|
||||||
if (error instanceof Error &&
|
if (
|
||||||
|
error instanceof Error &&
|
||||||
(error.message.includes("Execution context was destroyed") ||
|
(error.message.includes("Execution context was destroyed") ||
|
||||||
error.message.includes("most likely because of a navigation") ||
|
error.message.includes("most likely because of a navigation") ||
|
||||||
error.message.includes("Target closed"))) {
|
error.message.includes("Target closed"))
|
||||||
|
) {
|
||||||
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
||||||
return; // Don't emit error for navigation - this is expected
|
return; // Don't emit error for navigation - this is expected
|
||||||
}
|
}
|
||||||
@@ -1772,6 +1824,7 @@ export class RemoteBrowser {
|
|||||||
const page = this.currentPage?.context().pages()[tabIndex];
|
const page = this.currentPage?.context().pages()[tabIndex];
|
||||||
if (page) {
|
if (page) {
|
||||||
await this.stopScreencast();
|
await this.stopScreencast();
|
||||||
|
await this.stopDOM();
|
||||||
this.currentPage = page;
|
this.currentPage = page;
|
||||||
|
|
||||||
await this.setupPageEventListeners(this.currentPage);
|
await this.setupPageEventListeners(this.currentPage);
|
||||||
@@ -1783,8 +1836,13 @@ export class RemoteBrowser {
|
|||||||
url: this.currentPage.url(),
|
url: this.currentPage.url(),
|
||||||
userId: this.userId
|
userId: this.userId
|
||||||
});
|
});
|
||||||
|
if (this.isDOMStreamingActive) {
|
||||||
|
await this.makeAndEmitDOMSnapshot();
|
||||||
|
await this.subscribeToDOM();
|
||||||
|
} else {
|
||||||
await this.makeAndEmitScreenshot();
|
await this.makeAndEmitScreenshot();
|
||||||
await this.subscribeToScreencast();
|
await this.subscribeToScreencast();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.log('error', `${tabIndex} index out of range of pages`)
|
logger.log('error', `${tabIndex} index out of range of pages`)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -464,7 +464,6 @@ export class WorkflowGenerator {
|
|||||||
public onClick = async (coordinates: Coordinates, page: Page) => {
|
public onClick = async (coordinates: Coordinates, page: Page) => {
|
||||||
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
|
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
|
||||||
const selector = await this.generateSelector(page, coordinates, ActionType.Click);
|
const selector = await this.generateSelector(page, coordinates, ActionType.Click);
|
||||||
console.log("COOORDINATES: ", coordinates);
|
|
||||||
logger.log('debug', `Element's selector: ${selector}`);
|
logger.log('debug', `Element's selector: ${selector}`);
|
||||||
|
|
||||||
const elementInfo = await getElementInformation(page, coordinates, '', false);
|
const elementInfo = await getElementInformation(page, coordinates, '', false);
|
||||||
@@ -999,6 +998,7 @@ export class WorkflowGenerator {
|
|||||||
rect,
|
rect,
|
||||||
selector: displaySelector,
|
selector: displaySelector,
|
||||||
elementInfo,
|
elementInfo,
|
||||||
|
isDOMMode: this.isDOMMode,
|
||||||
// Include shadow DOM specific information
|
// Include shadow DOM specific information
|
||||||
shadowInfo: elementInfo?.isShadowRoot ? {
|
shadowInfo: elementInfo?.isShadowRoot ? {
|
||||||
mode: elementInfo.shadowRootMode,
|
mode: elementInfo.shadowRootMode,
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import { useTranslation } from 'react-i18next';
|
|||||||
import { AuthContext } from '../../context/auth';
|
import { AuthContext } from '../../context/auth';
|
||||||
import { coordinateMapper } from '../../helpers/coordinateMapper';
|
import { coordinateMapper } from '../../helpers/coordinateMapper';
|
||||||
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
||||||
import { clientSelectorGenerator } from "../../helpers/clientSelectorGenerator";
|
import { clientSelectorGenerator, ElementFingerprint } from "../../helpers/clientSelectorGenerator";
|
||||||
import DatePicker from "../pickers/DatePicker";
|
import DatePicker from "../pickers/DatePicker";
|
||||||
import Dropdown from "../pickers/Dropdown";
|
import Dropdown from "../pickers/Dropdown";
|
||||||
import TimePicker from "../pickers/TimePicker";
|
import TimePicker from "../pickers/TimePicker";
|
||||||
@@ -147,15 +147,14 @@ export const BrowserWindow = () => {
|
|||||||
const { browserWidth, browserHeight } = useBrowserDimensionsStore();
|
const { browserWidth, browserHeight } = useBrowserDimensionsStore();
|
||||||
const [canvasRef, setCanvasReference] = useState<React.RefObject<HTMLCanvasElement> | undefined>(undefined);
|
const [canvasRef, setCanvasReference] = useState<React.RefObject<HTMLCanvasElement> | undefined>(undefined);
|
||||||
const [screenShot, setScreenShot] = useState<string>("");
|
const [screenShot, setScreenShot] = useState<string>("");
|
||||||
const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] } | null>(null);
|
const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[], groupElements?: Array<{ element: HTMLElement; rect: DOMRect } >} | null>(null);
|
||||||
const [showAttributeModal, setShowAttributeModal] = useState(false);
|
const [showAttributeModal, setShowAttributeModal] = useState(false);
|
||||||
const [attributeOptions, setAttributeOptions] = useState<AttributeOption[]>([]);
|
const [attributeOptions, setAttributeOptions] = useState<AttributeOption[]>([]);
|
||||||
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
|
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
|
||||||
const [currentListId, setCurrentListId] = useState<number | null>(null);
|
const [currentListId, setCurrentListId] = useState<number | null>(null);
|
||||||
const [viewportInfo, setViewportInfo] = useState<ViewportInfo>({ width: browserWidth, height: browserHeight });
|
const [viewportInfo, setViewportInfo] = useState<ViewportInfo>({ width: browserWidth, height: browserHeight });
|
||||||
const [isDOMMode, setIsDOMMode] = useState(false);
|
|
||||||
const [currentSnapshot, setCurrentSnapshot] = useState<ProcessedSnapshot | null>(null);
|
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [cachedChildSelectors, setCachedChildSelectors] = useState<string[]>([]);
|
||||||
|
|
||||||
const [listSelector, setListSelector] = useState<string | null>(null);
|
const [listSelector, setListSelector] = useState<string | null>(null);
|
||||||
const [fields, setFields] = useState<Record<string, TextStep>>({});
|
const [fields, setFields] = useState<Record<string, TextStep>>({});
|
||||||
@@ -164,10 +163,16 @@ export const BrowserWindow = () => {
|
|||||||
const highlighterUpdateRef = useRef<number>(0);
|
const highlighterUpdateRef = useRef<number>(0);
|
||||||
|
|
||||||
const { socket } = useSocketStore();
|
const { socket } = useSocketStore();
|
||||||
const { notify, currentTextActionId, currentListActionId } = useGlobalInfoStore();
|
const { notify, currentTextActionId, currentListActionId, updateDOMMode, isDOMMode, currentSnapshot } = useGlobalInfoStore();
|
||||||
const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext();
|
const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext();
|
||||||
const { addTextStep, addListStep, updateListStepData } = useBrowserSteps();
|
const { addTextStep, addListStep, updateListStepData } = useBrowserSteps();
|
||||||
|
|
||||||
|
const [currentGroupInfo, setCurrentGroupInfo] = useState<{
|
||||||
|
isGroupElement: boolean;
|
||||||
|
groupSize: number;
|
||||||
|
groupElements: HTMLElement[];
|
||||||
|
} | null>(null);
|
||||||
|
|
||||||
const { state } = useContext(AuthContext);
|
const { state } = useContext(AuthContext);
|
||||||
const { user } = state;
|
const { user } = state;
|
||||||
|
|
||||||
@@ -243,51 +248,47 @@ export const BrowserWindow = () => {
|
|||||||
(data: RRWebDOMCastData) => {
|
(data: RRWebDOMCastData) => {
|
||||||
if (!data.userId || data.userId === user?.id) {
|
if (!data.userId || data.userId === user?.id) {
|
||||||
if (data.snapshotData && data.snapshotData.snapshot) {
|
if (data.snapshotData && data.snapshotData.snapshot) {
|
||||||
setCurrentSnapshot(data.snapshotData);
|
updateDOMMode(true, data.snapshotData);
|
||||||
setIsDOMMode(true);
|
|
||||||
socket?.emit("dom-mode-enabled");
|
socket?.emit("dom-mode-enabled");
|
||||||
|
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
} else {
|
} else {
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[user?.id, socket]
|
[user?.id, socket, updateDOMMode]
|
||||||
);
|
);
|
||||||
|
|
||||||
const domModeHandler = useCallback(
|
const domModeHandler = useCallback(
|
||||||
(data: any) => {
|
(data: any) => {
|
||||||
if (!data.userId || data.userId === user?.id) {
|
if (!data.userId || data.userId === user?.id) {
|
||||||
setIsDOMMode(true);
|
updateDOMMode(true);
|
||||||
socket?.emit("dom-mode-enabled");
|
socket?.emit("dom-mode-enabled");
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[user?.id, socket]
|
[user?.id, socket, updateDOMMode]
|
||||||
);
|
);
|
||||||
|
|
||||||
const screenshotModeHandler = useCallback(
|
const screenshotModeHandler = useCallback(
|
||||||
(data: any) => {
|
(data: any) => {
|
||||||
if (!data.userId || data.userId === user?.id) {
|
if (!data.userId || data.userId === user?.id) {
|
||||||
setIsDOMMode(false);
|
updateDOMMode(false);
|
||||||
socket?.emit("screenshot-mode-enabled");
|
socket?.emit("screenshot-mode-enabled");
|
||||||
setCurrentSnapshot(null);
|
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[user?.id]
|
[user?.id, updateDOMMode]
|
||||||
);
|
);
|
||||||
|
|
||||||
const domModeErrorHandler = useCallback(
|
const domModeErrorHandler = useCallback(
|
||||||
(data: any) => {
|
(data: any) => {
|
||||||
if (!data.userId || data.userId === user?.id) {
|
if (!data.userId || data.userId === user?.id) {
|
||||||
setIsDOMMode(false);
|
updateDOMMode(false);
|
||||||
setCurrentSnapshot(null);
|
|
||||||
setIsLoading(false);
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[user?.id]
|
[user?.id, updateDOMMode]
|
||||||
);
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -304,8 +305,23 @@ export const BrowserWindow = () => {
|
|||||||
socket?.emit("listSelector", { selector: listSelector });
|
socket?.emit("listSelector", { selector: listSelector });
|
||||||
|
|
||||||
clientSelectorGenerator.setListSelector(listSelector);
|
clientSelectorGenerator.setListSelector(listSelector);
|
||||||
|
|
||||||
|
setCachedChildSelectors([]);
|
||||||
|
|
||||||
|
if (currentSnapshot) {
|
||||||
|
const iframeElement = document.querySelector(
|
||||||
|
"#dom-browser-iframe"
|
||||||
|
) as HTMLIFrameElement;
|
||||||
|
if (iframeElement?.contentDocument) {
|
||||||
|
const childSelectors = clientSelectorGenerator.getChildSelectors(
|
||||||
|
iframeElement.contentDocument,
|
||||||
|
listSelector
|
||||||
|
);
|
||||||
|
setCachedChildSelectors(childSelectors);
|
||||||
}
|
}
|
||||||
}, [isDOMMode, listSelector, socket, getList]);
|
}
|
||||||
|
}
|
||||||
|
}, [isDOMMode, listSelector, socket, getList, currentSnapshot]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
coordinateMapper.updateDimensions(dimensions.width, dimensions.height, viewportInfo.width, viewportInfo.height);
|
coordinateMapper.updateDimensions(dimensions.width, dimensions.height, viewportInfo.width, viewportInfo.height);
|
||||||
@@ -345,6 +361,7 @@ export const BrowserWindow = () => {
|
|||||||
setListSelector(null);
|
setListSelector(null);
|
||||||
setFields({});
|
setFields({});
|
||||||
setCurrentListId(null);
|
setCurrentListId(null);
|
||||||
|
setCachedChildSelectors([]);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -372,7 +389,7 @@ export const BrowserWindow = () => {
|
|||||||
socket.on("screencast", screencastHandler);
|
socket.on("screencast", screencastHandler);
|
||||||
socket.on("domcast", rrwebSnapshotHandler);
|
socket.on("domcast", rrwebSnapshotHandler);
|
||||||
socket.on("dom-mode-enabled", domModeHandler);
|
socket.on("dom-mode-enabled", domModeHandler);
|
||||||
socket.on("screenshot-mode-enabled", screenshotModeHandler);
|
// socket.on("screenshot-mode-enabled", screenshotModeHandler);
|
||||||
socket.on("dom-mode-error", domModeErrorHandler);
|
socket.on("dom-mode-error", domModeErrorHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -386,7 +403,7 @@ export const BrowserWindow = () => {
|
|||||||
socket.off("screencast", screencastHandler);
|
socket.off("screencast", screencastHandler);
|
||||||
socket.off("domcast", rrwebSnapshotHandler);
|
socket.off("domcast", rrwebSnapshotHandler);
|
||||||
socket.off("dom-mode-enabled", domModeHandler);
|
socket.off("dom-mode-enabled", domModeHandler);
|
||||||
socket.off("screenshot-mode-enabled", screenshotModeHandler);
|
// socket.off("screenshot-mode-enabled", screenshotModeHandler);
|
||||||
socket.off("dom-mode-error", domModeErrorHandler);
|
socket.off("dom-mode-error", domModeErrorHandler);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -398,7 +415,7 @@ export const BrowserWindow = () => {
|
|||||||
screencastHandler,
|
screencastHandler,
|
||||||
rrwebSnapshotHandler,
|
rrwebSnapshotHandler,
|
||||||
domModeHandler,
|
domModeHandler,
|
||||||
screenshotModeHandler,
|
// screenshotModeHandler,
|
||||||
domModeErrorHandler,
|
domModeErrorHandler,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -408,8 +425,19 @@ export const BrowserWindow = () => {
|
|||||||
selector: string;
|
selector: string;
|
||||||
elementInfo: ElementInfo | null;
|
elementInfo: ElementInfo | null;
|
||||||
childSelectors?: string[];
|
childSelectors?: string[];
|
||||||
|
groupInfo?: {
|
||||||
|
isGroupElement: boolean;
|
||||||
|
groupSize: number;
|
||||||
|
groupElements: HTMLElement[];
|
||||||
|
groupFingerprint: ElementFingerprint;
|
||||||
|
};
|
||||||
isDOMMode?: boolean;
|
isDOMMode?: boolean;
|
||||||
}) => {
|
}) => {
|
||||||
|
if (!getText && !getList) {
|
||||||
|
setHighlighterData(null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isDOMMode || !currentSnapshot) {
|
if (!isDOMMode || !currentSnapshot) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -424,15 +452,6 @@ export const BrowserWindow = () => {
|
|||||||
) as HTMLIFrameElement;
|
) as HTMLIFrameElement;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!iframeElement) {
|
|
||||||
const browserWindow = document.querySelector("#browser-window");
|
|
||||||
if (browserWindow) {
|
|
||||||
iframeElement = browserWindow.querySelector(
|
|
||||||
"iframe"
|
|
||||||
) as HTMLIFrameElement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!iframeElement) {
|
if (!iframeElement) {
|
||||||
console.error("Could not find iframe element for DOM highlighting");
|
console.error("Could not find iframe element for DOM highlighting");
|
||||||
return;
|
return;
|
||||||
@@ -441,6 +460,12 @@ export const BrowserWindow = () => {
|
|||||||
const iframeRect = iframeElement.getBoundingClientRect();
|
const iframeRect = iframeElement.getBoundingClientRect();
|
||||||
const IFRAME_BODY_PADDING = 16;
|
const IFRAME_BODY_PADDING = 16;
|
||||||
|
|
||||||
|
if (data.groupInfo) {
|
||||||
|
setCurrentGroupInfo(data.groupInfo);
|
||||||
|
} else {
|
||||||
|
setCurrentGroupInfo(null);
|
||||||
|
}
|
||||||
|
|
||||||
const absoluteRect = new DOMRect(
|
const absoluteRect = new DOMRect(
|
||||||
data.rect.x + iframeRect.left - IFRAME_BODY_PADDING,
|
data.rect.x + iframeRect.left - IFRAME_BODY_PADDING,
|
||||||
data.rect.y + iframeRect.top - IFRAME_BODY_PADDING,
|
data.rect.y + iframeRect.top - IFRAME_BODY_PADDING,
|
||||||
@@ -451,12 +476,36 @@ export const BrowserWindow = () => {
|
|||||||
const mappedData = {
|
const mappedData = {
|
||||||
...data,
|
...data,
|
||||||
rect: absoluteRect,
|
rect: absoluteRect,
|
||||||
|
childSelectors: data.childSelectors || cachedChildSelectors,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (getList === true) {
|
if (getList === true) {
|
||||||
if (listSelector) {
|
if (!listSelector && data.groupInfo?.isGroupElement) {
|
||||||
socket?.emit("listSelector", { selector: listSelector });
|
const updatedGroupElements = data.groupInfo.groupElements.map(
|
||||||
const hasValidChildSelectors =
|
(element) => {
|
||||||
|
const elementRect = element.getBoundingClientRect();
|
||||||
|
return {
|
||||||
|
element,
|
||||||
|
rect: new DOMRect(
|
||||||
|
elementRect.x + iframeRect.left - IFRAME_BODY_PADDING,
|
||||||
|
elementRect.y + iframeRect.top - IFRAME_BODY_PADDING,
|
||||||
|
elementRect.width,
|
||||||
|
elementRect.height
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const mappedData = {
|
||||||
|
...data,
|
||||||
|
rect: absoluteRect,
|
||||||
|
groupElements: updatedGroupElements,
|
||||||
|
childSelectors: data.childSelectors || cachedChildSelectors,
|
||||||
|
};
|
||||||
|
|
||||||
|
setHighlighterData(mappedData);
|
||||||
|
} else if (listSelector) {
|
||||||
|
const hasChildSelectors =
|
||||||
Array.isArray(mappedData.childSelectors) &&
|
Array.isArray(mappedData.childSelectors) &&
|
||||||
mappedData.childSelectors.length > 0;
|
mappedData.childSelectors.length > 0;
|
||||||
|
|
||||||
@@ -471,62 +520,8 @@ export const BrowserWindow = () => {
|
|||||||
} else {
|
} else {
|
||||||
setHighlighterData(null);
|
setHighlighterData(null);
|
||||||
}
|
}
|
||||||
} else if (
|
} else if (hasChildSelectors) {
|
||||||
mappedData.childSelectors &&
|
|
||||||
mappedData.childSelectors.includes(mappedData.selector)
|
|
||||||
) {
|
|
||||||
setHighlighterData(mappedData);
|
setHighlighterData(mappedData);
|
||||||
} else if (
|
|
||||||
mappedData.elementInfo?.isIframeContent &&
|
|
||||||
mappedData.childSelectors
|
|
||||||
) {
|
|
||||||
const isIframeChild = mappedData.childSelectors.some(
|
|
||||||
(childSelector) =>
|
|
||||||
mappedData.selector.includes(":>>") &&
|
|
||||||
childSelector
|
|
||||||
.split(":>>")
|
|
||||||
.some((part) => mappedData.selector.includes(part.trim()))
|
|
||||||
);
|
|
||||||
setHighlighterData(isIframeChild ? mappedData : null);
|
|
||||||
} else if (
|
|
||||||
mappedData.selector.includes(":>>") &&
|
|
||||||
hasValidChildSelectors
|
|
||||||
) {
|
|
||||||
const selectorParts = mappedData.selector
|
|
||||||
.split(":>>")
|
|
||||||
.map((part) => part.trim());
|
|
||||||
const isValidMixedSelector = selectorParts.some((part) =>
|
|
||||||
mappedData.childSelectors!.some((childSelector) =>
|
|
||||||
childSelector.includes(part)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
setHighlighterData(isValidMixedSelector ? mappedData : null);
|
|
||||||
} else if (
|
|
||||||
mappedData.elementInfo?.isShadowRoot &&
|
|
||||||
mappedData.childSelectors
|
|
||||||
) {
|
|
||||||
const isShadowChild = mappedData.childSelectors.some(
|
|
||||||
(childSelector) =>
|
|
||||||
mappedData.selector.includes(">>") &&
|
|
||||||
childSelector
|
|
||||||
.split(">>")
|
|
||||||
.some((part) => mappedData.selector.includes(part.trim()))
|
|
||||||
);
|
|
||||||
setHighlighterData(isShadowChild ? mappedData : null);
|
|
||||||
} else if (
|
|
||||||
mappedData.selector.includes(">>") &&
|
|
||||||
hasValidChildSelectors
|
|
||||||
) {
|
|
||||||
const selectorParts = mappedData.selector
|
|
||||||
.split(">>")
|
|
||||||
.map((part) => part.trim());
|
|
||||||
const isValidMixedSelector = selectorParts.some((part) =>
|
|
||||||
mappedData.childSelectors!.some((childSelector) =>
|
|
||||||
childSelector.includes(part)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
setHighlighterData(isValidMixedSelector ? mappedData : null);
|
|
||||||
} else {
|
} else {
|
||||||
setHighlighterData(null);
|
setHighlighterData(null);
|
||||||
}
|
}
|
||||||
@@ -534,23 +529,29 @@ export const BrowserWindow = () => {
|
|||||||
setHighlighterData(mappedData);
|
setHighlighterData(mappedData);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// getText mode
|
|
||||||
setHighlighterData(mappedData);
|
setHighlighterData(mappedData);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[
|
[
|
||||||
isDOMMode,
|
isDOMMode,
|
||||||
currentSnapshot,
|
currentSnapshot,
|
||||||
|
getText,
|
||||||
getList,
|
getList,
|
||||||
socket,
|
socket,
|
||||||
listSelector,
|
listSelector,
|
||||||
paginationMode,
|
paginationMode,
|
||||||
paginationType,
|
paginationType,
|
||||||
limitMode,
|
limitMode,
|
||||||
|
cachedChildSelectors,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => {
|
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[], isDOMMode?: boolean; }) => {
|
||||||
|
if (isDOMMode || data.isDOMMode) {
|
||||||
|
domHighlighterHandler(data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const now = performance.now();
|
const now = performance.now();
|
||||||
if (now - highlighterUpdateRef.current < 16) {
|
if (now - highlighterUpdateRef.current < 16) {
|
||||||
return;
|
return;
|
||||||
@@ -652,6 +653,20 @@ export const BrowserWindow = () => {
|
|||||||
};
|
};
|
||||||
}, [socket, highlighterHandler, onMouseMove, getList, listSelector]);
|
}, [socket, highlighterHandler, onMouseMove, getList, listSelector]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
document.addEventListener("mousemove", onMouseMove, false);
|
||||||
|
if (socket) {
|
||||||
|
socket.off("highlighter", highlighterHandler);
|
||||||
|
socket.on("highlighter", highlighterHandler);
|
||||||
|
}
|
||||||
|
return () => {
|
||||||
|
document.removeEventListener("mousemove", onMouseMove);
|
||||||
|
if (socket) {
|
||||||
|
socket.off("highlighter", highlighterHandler);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}, [socket, highlighterHandler, getList, listSelector]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (socket && listSelector) {
|
if (socket && listSelector) {
|
||||||
console.log('Syncing list selector with server:', listSelector);
|
console.log('Syncing list selector with server:', listSelector);
|
||||||
@@ -673,11 +688,205 @@ export const BrowserWindow = () => {
|
|||||||
selector: string;
|
selector: string;
|
||||||
elementInfo: ElementInfo | null;
|
elementInfo: ElementInfo | null;
|
||||||
childSelectors?: string[];
|
childSelectors?: string[];
|
||||||
|
groupInfo?: {
|
||||||
|
isGroupElement: boolean;
|
||||||
|
groupSize: number;
|
||||||
|
groupElements: HTMLElement[];
|
||||||
|
};
|
||||||
}) => {
|
}) => {
|
||||||
setShowAttributeModal(false);
|
setShowAttributeModal(false);
|
||||||
setSelectedElement(null);
|
setSelectedElement(null);
|
||||||
setAttributeOptions([]);
|
setAttributeOptions([]);
|
||||||
|
|
||||||
|
if (paginationMode && getList) {
|
||||||
|
if (
|
||||||
|
paginationType !== "" &&
|
||||||
|
paginationType !== "scrollDown" &&
|
||||||
|
paginationType !== "scrollUp" &&
|
||||||
|
paginationType !== "none"
|
||||||
|
) {
|
||||||
|
setPaginationSelector(highlighterData.selector);
|
||||||
|
notify(
|
||||||
|
`info`,
|
||||||
|
t(
|
||||||
|
"browser_window.attribute_modal.notifications.pagination_select_success"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
addListStep(
|
||||||
|
listSelector!,
|
||||||
|
fields,
|
||||||
|
currentListId || 0,
|
||||||
|
currentListActionId || `list-${crypto.randomUUID()}`,
|
||||||
|
{ type: paginationType, selector: highlighterData.selector }
|
||||||
|
);
|
||||||
|
socket?.emit("setPaginationMode", { pagination: false });
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
getList === true &&
|
||||||
|
!listSelector &&
|
||||||
|
highlighterData.groupInfo?.isGroupElement
|
||||||
|
) {
|
||||||
|
let cleanedSelector = highlighterData.selector;
|
||||||
|
|
||||||
|
setListSelector(cleanedSelector);
|
||||||
|
notify(
|
||||||
|
`info`,
|
||||||
|
t(
|
||||||
|
"browser_window.attribute_modal.notifications.list_select_success",
|
||||||
|
{
|
||||||
|
count: highlighterData.groupInfo.groupSize,
|
||||||
|
}
|
||||||
|
) ||
|
||||||
|
`Selected group with ${highlighterData.groupInfo.groupSize} similar elements`
|
||||||
|
);
|
||||||
|
setCurrentListId(Date.now());
|
||||||
|
setFields({});
|
||||||
|
|
||||||
|
socket?.emit("setGetList", { getList: true });
|
||||||
|
socket?.emit("listSelector", { selector: cleanedSelector });
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getList === true && listSelector && currentListId) {
|
||||||
|
const options = getAttributeOptions(
|
||||||
|
highlighterData.elementInfo?.tagName || "",
|
||||||
|
highlighterData.elementInfo
|
||||||
|
);
|
||||||
|
|
||||||
|
if (options.length === 1) {
|
||||||
|
const attribute = options[0].value;
|
||||||
|
let currentSelector = highlighterData.selector;
|
||||||
|
|
||||||
|
const data =
|
||||||
|
attribute === "href"
|
||||||
|
? highlighterData.elementInfo?.url || ""
|
||||||
|
: attribute === "src"
|
||||||
|
? highlighterData.elementInfo?.imageUrl || ""
|
||||||
|
: highlighterData.elementInfo?.innerText || "";
|
||||||
|
|
||||||
|
const newField: TextStep = {
|
||||||
|
id: Date.now(),
|
||||||
|
type: "text",
|
||||||
|
label: `Label ${Object.keys(fields).length + 1}`,
|
||||||
|
data: data,
|
||||||
|
selectorObj: {
|
||||||
|
selector: currentSelector,
|
||||||
|
tag: highlighterData.elementInfo?.tagName,
|
||||||
|
shadow: highlighterData.elementInfo?.isShadowRoot,
|
||||||
|
attribute,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const updatedFields = {
|
||||||
|
...fields,
|
||||||
|
[newField.id]: newField,
|
||||||
|
};
|
||||||
|
|
||||||
|
setFields(updatedFields);
|
||||||
|
|
||||||
|
if (listSelector) {
|
||||||
|
addListStep(
|
||||||
|
listSelector,
|
||||||
|
updatedFields,
|
||||||
|
currentListId,
|
||||||
|
currentListActionId || `list-${crypto.randomUUID()}`,
|
||||||
|
{ type: "", selector: paginationSelector }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setAttributeOptions(options);
|
||||||
|
setSelectedElement({
|
||||||
|
selector: highlighterData.selector,
|
||||||
|
info: highlighterData.elementInfo,
|
||||||
|
});
|
||||||
|
setShowAttributeModal(true);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getText === true) {
|
||||||
|
const options = getAttributeOptions(
|
||||||
|
highlighterData.elementInfo?.tagName || "",
|
||||||
|
highlighterData.elementInfo
|
||||||
|
);
|
||||||
|
|
||||||
|
if (options.length === 1) {
|
||||||
|
const attribute = options[0].value;
|
||||||
|
const data =
|
||||||
|
attribute === "href"
|
||||||
|
? highlighterData.elementInfo?.url || ""
|
||||||
|
: attribute === "src"
|
||||||
|
? highlighterData.elementInfo?.imageUrl || ""
|
||||||
|
: highlighterData.elementInfo?.innerText || "";
|
||||||
|
|
||||||
|
addTextStep(
|
||||||
|
"",
|
||||||
|
data,
|
||||||
|
{
|
||||||
|
selector: highlighterData.selector,
|
||||||
|
tag: highlighterData.elementInfo?.tagName,
|
||||||
|
shadow: highlighterData.elementInfo?.isShadowRoot,
|
||||||
|
attribute,
|
||||||
|
},
|
||||||
|
currentTextActionId || `text-${crypto.randomUUID()}`
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
setAttributeOptions(options);
|
||||||
|
setSelectedElement({
|
||||||
|
selector: highlighterData.selector,
|
||||||
|
info: highlighterData.elementInfo,
|
||||||
|
});
|
||||||
|
setShowAttributeModal(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[
|
||||||
|
getText,
|
||||||
|
getList,
|
||||||
|
listSelector,
|
||||||
|
paginationMode,
|
||||||
|
paginationType,
|
||||||
|
limitMode,
|
||||||
|
fields,
|
||||||
|
currentListId,
|
||||||
|
currentTextActionId,
|
||||||
|
currentListActionId,
|
||||||
|
addTextStep,
|
||||||
|
addListStep,
|
||||||
|
notify,
|
||||||
|
socket,
|
||||||
|
t,
|
||||||
|
paginationSelector,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||||
|
if (highlighterData) {
|
||||||
|
let shouldProcessClick = false;
|
||||||
|
|
||||||
|
if (!isDOMMode && canvasRef?.current) {
|
||||||
|
const canvasRect = canvasRef.current.getBoundingClientRect();
|
||||||
|
const clickX = e.clientX - canvasRect.left;
|
||||||
|
const clickY = e.clientY - canvasRect.top;
|
||||||
|
const highlightRect = highlighterData.rect;
|
||||||
|
const mappedRect =
|
||||||
|
coordinateMapper.mapBrowserRectToCanvas(highlightRect);
|
||||||
|
|
||||||
|
shouldProcessClick =
|
||||||
|
clickX >= mappedRect.left &&
|
||||||
|
clickX <= mappedRect.right &&
|
||||||
|
clickY >= mappedRect.top &&
|
||||||
|
clickY <= mappedRect.bottom;
|
||||||
|
} else {
|
||||||
|
shouldProcessClick = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shouldProcessClick) {
|
||||||
const options = getAttributeOptions(
|
const options = getAttributeOptions(
|
||||||
highlighterData.elementInfo?.tagName || "",
|
highlighterData.elementInfo?.tagName || "",
|
||||||
highlighterData.elementInfo
|
highlighterData.elementInfo
|
||||||
@@ -742,47 +951,24 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
if (getList === true && !listSelector) {
|
if (getList === true && !listSelector) {
|
||||||
let cleanedSelector = highlighterData.selector;
|
let cleanedSelector = highlighterData.selector;
|
||||||
if (cleanedSelector.includes("nth-child")) {
|
if (
|
||||||
cleanedSelector = cleanedSelector.replace(/:nth-child\(\d+\)/g, "");
|
cleanedSelector.includes("[") &&
|
||||||
|
cleanedSelector.match(/\[\d+\]/)
|
||||||
|
) {
|
||||||
|
cleanedSelector = cleanedSelector.replace(/\[\d+\]/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
setListSelector(cleanedSelector);
|
setListSelector(cleanedSelector);
|
||||||
notify(
|
notify(
|
||||||
`info`,
|
`info`,
|
||||||
t("browser_window.attribute_modal.notifications.list_select_success")
|
t(
|
||||||
|
"browser_window.attribute_modal.notifications.list_select_success"
|
||||||
|
)
|
||||||
);
|
);
|
||||||
setCurrentListId(Date.now());
|
setCurrentListId(Date.now());
|
||||||
setFields({});
|
setFields({});
|
||||||
|
|
||||||
socket?.emit("setGetList", { getList: true });
|
|
||||||
socket?.emit("listSelector", { selector: cleanedSelector });
|
|
||||||
} else if (getList === true && listSelector && currentListId) {
|
} else if (getList === true && listSelector && currentListId) {
|
||||||
if (options.length === 1) {
|
|
||||||
const attribute = options[0].value;
|
const attribute = options[0].value;
|
||||||
let currentSelector = highlighterData.selector;
|
|
||||||
|
|
||||||
if (currentSelector.includes(">")) {
|
|
||||||
const [firstPart, ...restParts] = currentSelector
|
|
||||||
.split(">")
|
|
||||||
.map((p) => p.trim());
|
|
||||||
const listSelectorRightPart = listSelector
|
|
||||||
.split(">")
|
|
||||||
.pop()
|
|
||||||
?.trim()
|
|
||||||
.replace(/:nth-child\(\d+\)/g, "");
|
|
||||||
|
|
||||||
if (
|
|
||||||
firstPart.includes("nth-child") &&
|
|
||||||
firstPart.replace(/:nth-child\(\d+\)/g, "") ===
|
|
||||||
listSelectorRightPart
|
|
||||||
) {
|
|
||||||
currentSelector = `${firstPart.replace(
|
|
||||||
/:nth-child\(\d+\)/g,
|
|
||||||
""
|
|
||||||
)} > ${restParts.join(" > ")}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const data =
|
const data =
|
||||||
attribute === "href"
|
attribute === "href"
|
||||||
? highlighterData.elementInfo?.url || ""
|
? highlighterData.elementInfo?.url || ""
|
||||||
@@ -790,6 +976,22 @@ export const BrowserWindow = () => {
|
|||||||
? highlighterData.elementInfo?.imageUrl || ""
|
? highlighterData.elementInfo?.imageUrl || ""
|
||||||
: highlighterData.elementInfo?.innerText || "";
|
: highlighterData.elementInfo?.innerText || "";
|
||||||
|
|
||||||
|
if (options.length === 1) {
|
||||||
|
let currentSelector = highlighterData.selector;
|
||||||
|
|
||||||
|
if (currentSelector.includes("/")) {
|
||||||
|
const xpathParts = currentSelector
|
||||||
|
.split("/")
|
||||||
|
.filter((part) => part);
|
||||||
|
const cleanedParts = xpathParts.map((part) => {
|
||||||
|
return part.replace(/\[\d+\]/g, "");
|
||||||
|
});
|
||||||
|
|
||||||
|
if (cleanedParts.length > 0) {
|
||||||
|
currentSelector = "//" + cleanedParts.join("/");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const newField: TextStep = {
|
const newField: TextStep = {
|
||||||
id: Date.now(),
|
id: Date.now(),
|
||||||
type: "text",
|
type: "text",
|
||||||
@@ -828,150 +1030,6 @@ export const BrowserWindow = () => {
|
|||||||
setShowAttributeModal(true);
|
setShowAttributeModal(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
|
||||||
[
|
|
||||||
getText,
|
|
||||||
getList,
|
|
||||||
listSelector,
|
|
||||||
paginationMode,
|
|
||||||
paginationType,
|
|
||||||
fields,
|
|
||||||
currentListId,
|
|
||||||
currentTextActionId,
|
|
||||||
currentListActionId,
|
|
||||||
addTextStep,
|
|
||||||
addListStep,
|
|
||||||
notify,
|
|
||||||
socket,
|
|
||||||
t,
|
|
||||||
paginationSelector,
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
|
||||||
if (highlighterData && canvasRef?.current) {
|
|
||||||
const canvasRect = canvasRef.current.getBoundingClientRect();
|
|
||||||
const clickX = e.clientX - canvasRect.left;
|
|
||||||
const clickY = e.clientY - canvasRect.top;
|
|
||||||
|
|
||||||
const highlightRect = highlighterData.rect;
|
|
||||||
|
|
||||||
const mappedRect = coordinateMapper.mapBrowserRectToCanvas(highlightRect);
|
|
||||||
if (
|
|
||||||
clickX >= mappedRect.left &&
|
|
||||||
clickX <= mappedRect.right &&
|
|
||||||
clickY >= mappedRect.top &&
|
|
||||||
clickY <= mappedRect.bottom
|
|
||||||
) {
|
|
||||||
|
|
||||||
const options = getAttributeOptions(highlighterData.elementInfo?.tagName || '', highlighterData.elementInfo);
|
|
||||||
|
|
||||||
if (getText === true) {
|
|
||||||
if (options.length === 1) {
|
|
||||||
// Directly use the available attribute if only one option is present
|
|
||||||
const attribute = options[0].value;
|
|
||||||
const data = attribute === 'href' ? highlighterData.elementInfo?.url || '' :
|
|
||||||
attribute === 'src' ? highlighterData.elementInfo?.imageUrl || '' :
|
|
||||||
highlighterData.elementInfo?.innerText || '';
|
|
||||||
|
|
||||||
addTextStep('', data, {
|
|
||||||
selector: highlighterData.selector,
|
|
||||||
tag: highlighterData.elementInfo?.tagName,
|
|
||||||
shadow: highlighterData.elementInfo?.isShadowRoot,
|
|
||||||
attribute,
|
|
||||||
}, currentTextActionId || `text-${crypto.randomUUID()}`);
|
|
||||||
} else {
|
|
||||||
// Show the modal if there are multiple options
|
|
||||||
setAttributeOptions(options);
|
|
||||||
setSelectedElement({
|
|
||||||
selector: highlighterData.selector,
|
|
||||||
info: highlighterData.elementInfo,
|
|
||||||
});
|
|
||||||
setShowAttributeModal(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (paginationMode && getList) {
|
|
||||||
// Only allow selection in pagination mode if type is not empty, 'scrollDown', or 'scrollUp'
|
|
||||||
if (paginationType !== '' && paginationType !== 'scrollDown' && paginationType !== 'scrollUp' && paginationType !== 'none') {
|
|
||||||
setPaginationSelector(highlighterData.selector);
|
|
||||||
notify(`info`, t('browser_window.attribute_modal.notifications.pagination_select_success'));
|
|
||||||
addListStep(listSelector!, fields, currentListId || 0, currentListActionId || `list-${crypto.randomUUID()}`, { type: paginationType, selector: highlighterData.selector });
|
|
||||||
socket?.emit('setPaginationMode', { pagination: false });
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (getList === true && !listSelector) {
|
|
||||||
let cleanedSelector = highlighterData.selector;
|
|
||||||
if (cleanedSelector.includes('nth-child')) {
|
|
||||||
cleanedSelector = cleanedSelector.replace(/:nth-child\(\d+\)/g, '');
|
|
||||||
}
|
|
||||||
|
|
||||||
setListSelector(cleanedSelector);
|
|
||||||
notify(`info`, t('browser_window.attribute_modal.notifications.list_select_success'));
|
|
||||||
setCurrentListId(Date.now());
|
|
||||||
setFields({});
|
|
||||||
} else if (getList === true && listSelector && currentListId) {
|
|
||||||
const attribute = options[0].value;
|
|
||||||
const data = attribute === 'href' ? highlighterData.elementInfo?.url || '' :
|
|
||||||
attribute === 'src' ? highlighterData.elementInfo?.imageUrl || '' :
|
|
||||||
highlighterData.elementInfo?.innerText || '';
|
|
||||||
// Add fields to the list
|
|
||||||
if (options.length === 1) {
|
|
||||||
const attribute = options[0].value;
|
|
||||||
let currentSelector = highlighterData.selector;
|
|
||||||
|
|
||||||
if (currentSelector.includes('>')) {
|
|
||||||
const [firstPart, ...restParts] = currentSelector.split('>').map(p => p.trim());
|
|
||||||
const listSelectorRightPart = listSelector.split('>').pop()?.trim().replace(/:nth-child\(\d+\)/g, '');
|
|
||||||
|
|
||||||
if (firstPart.includes('nth-child') &&
|
|
||||||
firstPart.replace(/:nth-child\(\d+\)/g, '') === listSelectorRightPart) {
|
|
||||||
currentSelector = `${firstPart.replace(/:nth-child\(\d+\)/g, '')} > ${restParts.join(' > ')}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const newField: TextStep = {
|
|
||||||
id: Date.now(),
|
|
||||||
type: 'text',
|
|
||||||
label: `Label ${Object.keys(fields).length + 1}`,
|
|
||||||
data: data,
|
|
||||||
selectorObj: {
|
|
||||||
selector: currentSelector,
|
|
||||||
tag: highlighterData.elementInfo?.tagName,
|
|
||||||
shadow: highlighterData.elementInfo?.isShadowRoot,
|
|
||||||
attribute
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const updatedFields = {
|
|
||||||
...fields,
|
|
||||||
[newField.id]: newField
|
|
||||||
};
|
|
||||||
|
|
||||||
setFields(updatedFields);
|
|
||||||
|
|
||||||
if (listSelector) {
|
|
||||||
addListStep(
|
|
||||||
listSelector,
|
|
||||||
updatedFields,
|
|
||||||
currentListId,
|
|
||||||
currentListActionId || `list-${crypto.randomUUID()}`,
|
|
||||||
{ type: '', selector: paginationSelector }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
setAttributeOptions(options);
|
|
||||||
setSelectedElement({
|
|
||||||
selector: highlighterData.selector,
|
|
||||||
info: highlighterData.elementInfo
|
|
||||||
});
|
|
||||||
setShowAttributeModal(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -1150,6 +1208,10 @@ export const BrowserWindow = () => {
|
|||||||
|
|
||||||
{isDOMMode && highlighterData && (
|
{isDOMMode && highlighterData && (
|
||||||
<>
|
<>
|
||||||
|
{/* Individual element highlight (for non-group or hovered element) */}
|
||||||
|
{(!getList ||
|
||||||
|
listSelector ||
|
||||||
|
!currentGroupInfo?.isGroupElement) && (
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
position: "absolute",
|
position: "absolute",
|
||||||
@@ -1172,6 +1234,59 @@ export const BrowserWindow = () => {
|
|||||||
transition: "all 0.1s ease-out",
|
transition: "all 0.1s ease-out",
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Group elements highlighting with real-time coordinates */}
|
||||||
|
{getList &&
|
||||||
|
!listSelector &&
|
||||||
|
currentGroupInfo?.isGroupElement &&
|
||||||
|
highlighterData.groupElements &&
|
||||||
|
highlighterData.groupElements.map((groupElement, index) => (
|
||||||
|
<React.Fragment key={index}>
|
||||||
|
{/* Highlight box */}
|
||||||
|
<div
|
||||||
|
style={{
|
||||||
|
position: "absolute",
|
||||||
|
left: Math.max(0, groupElement.rect.x),
|
||||||
|
top: Math.max(0, groupElement.rect.y),
|
||||||
|
width: Math.min(
|
||||||
|
groupElement.rect.width,
|
||||||
|
dimensions.width
|
||||||
|
),
|
||||||
|
height: Math.min(
|
||||||
|
groupElement.rect.height,
|
||||||
|
dimensions.height
|
||||||
|
),
|
||||||
|
background: "rgba(255, 0, 195, 0.15)",
|
||||||
|
border: "2px dashed #ff00c3",
|
||||||
|
borderRadius: "3px",
|
||||||
|
pointerEvents: "none",
|
||||||
|
zIndex: 1000,
|
||||||
|
boxShadow: "0 0 0 1px rgba(255, 255, 255, 0.8)",
|
||||||
|
transition: "all 0.1s ease-out",
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div
|
||||||
|
style={{
|
||||||
|
position: "absolute",
|
||||||
|
left: Math.max(0, groupElement.rect.x),
|
||||||
|
top: Math.max(0, groupElement.rect.y - 20),
|
||||||
|
background: "#ff00c3",
|
||||||
|
color: "white",
|
||||||
|
padding: "2px 6px",
|
||||||
|
fontSize: "10px",
|
||||||
|
fontWeight: "bold",
|
||||||
|
borderRadius: "2px",
|
||||||
|
pointerEvents: "none",
|
||||||
|
zIndex: 1001,
|
||||||
|
whiteSpace: "nowrap",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
List item {index + 1}
|
||||||
|
</div>
|
||||||
|
</React.Fragment>
|
||||||
|
))}
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
@@ -1186,6 +1301,7 @@ export const BrowserWindow = () => {
|
|||||||
getList={getList}
|
getList={getList}
|
||||||
getText={getText}
|
getText={getText}
|
||||||
listSelector={listSelector}
|
listSelector={listSelector}
|
||||||
|
cachedChildSelectors={cachedChildSelectors}
|
||||||
paginationMode={paginationMode}
|
paginationMode={paginationMode}
|
||||||
paginationType={paginationType}
|
paginationType={paginationType}
|
||||||
limitMode={limitMode}
|
limitMode={limitMode}
|
||||||
|
|||||||
@@ -98,6 +98,7 @@ interface RRWebDOMBrowserRendererProps {
|
|||||||
getList?: boolean;
|
getList?: boolean;
|
||||||
getText?: boolean;
|
getText?: boolean;
|
||||||
listSelector?: string | null;
|
listSelector?: string | null;
|
||||||
|
cachedChildSelectors?: string[];
|
||||||
paginationMode?: boolean;
|
paginationMode?: boolean;
|
||||||
paginationType?: string;
|
paginationType?: string;
|
||||||
limitMode?: boolean;
|
limitMode?: boolean;
|
||||||
@@ -106,12 +107,14 @@ interface RRWebDOMBrowserRendererProps {
|
|||||||
selector: string;
|
selector: string;
|
||||||
elementInfo: ElementInfo | null;
|
elementInfo: ElementInfo | null;
|
||||||
childSelectors?: string[];
|
childSelectors?: string[];
|
||||||
|
groupInfo?: any;
|
||||||
}) => void;
|
}) => void;
|
||||||
onElementSelect?: (data: {
|
onElementSelect?: (data: {
|
||||||
rect: DOMRect;
|
rect: DOMRect;
|
||||||
selector: string;
|
selector: string;
|
||||||
elementInfo: ElementInfo | null;
|
elementInfo: ElementInfo | null;
|
||||||
childSelectors?: string[];
|
childSelectors?: string[];
|
||||||
|
groupInfo?: any;
|
||||||
}) => void;
|
}) => void;
|
||||||
onShowDatePicker?: (info: {
|
onShowDatePicker?: (info: {
|
||||||
coordinates: { x: number; y: number };
|
coordinates: { x: number; y: number };
|
||||||
@@ -144,6 +147,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
getList = false,
|
getList = false,
|
||||||
getText = false,
|
getText = false,
|
||||||
listSelector = null,
|
listSelector = null,
|
||||||
|
cachedChildSelectors = [],
|
||||||
paginationMode = false,
|
paginationMode = false,
|
||||||
paginationType = "",
|
paginationType = "",
|
||||||
limitMode = false,
|
limitMode = false,
|
||||||
@@ -205,11 +209,24 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
const handleDOMHighlighting = useCallback(
|
const handleDOMHighlighting = useCallback(
|
||||||
(x: number, y: number, iframeDoc: Document) => {
|
(x: number, y: number, iframeDoc: Document) => {
|
||||||
try {
|
try {
|
||||||
|
if (!getText && !getList) {
|
||||||
|
setCurrentHighlight(null);
|
||||||
|
if (onHighlight) {
|
||||||
|
onHighlight({
|
||||||
|
rect: new DOMRect(0, 0, 0, 0),
|
||||||
|
selector: "",
|
||||||
|
elementInfo: null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const highlighterData =
|
const highlighterData =
|
||||||
clientSelectorGenerator.generateDataForHighlighter(
|
clientSelectorGenerator.generateDataForHighlighter(
|
||||||
{ x, y },
|
{ x, y },
|
||||||
iframeDoc,
|
iframeDoc,
|
||||||
true
|
true,
|
||||||
|
cachedChildSelectors
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!highlighterData) {
|
if (!highlighterData) {
|
||||||
@@ -224,70 +241,40 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { rect, selector, elementInfo, childSelectors } = highlighterData;
|
const { rect, selector, elementInfo, childSelectors, groupInfo } =
|
||||||
|
highlighterData;
|
||||||
|
|
||||||
let shouldHighlight = false;
|
let shouldHighlight = false;
|
||||||
|
|
||||||
if (getList) {
|
if (getList) {
|
||||||
if (listSelector) {
|
// First phase: Allow any group to be highlighted for selection
|
||||||
const hasValidChildSelectors =
|
if (!listSelector && groupInfo?.isGroupElement) {
|
||||||
Array.isArray(childSelectors) && childSelectors.length > 0;
|
shouldHighlight = true;
|
||||||
|
}
|
||||||
|
// Second phase: Show valid children within selected group
|
||||||
|
else if (listSelector) {
|
||||||
if (limitMode) {
|
if (limitMode) {
|
||||||
shouldHighlight = false;
|
shouldHighlight = false;
|
||||||
} else if (paginationMode) {
|
} else if (
|
||||||
if (
|
paginationMode &&
|
||||||
paginationType !== "" &&
|
paginationType !== "" &&
|
||||||
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
||||||
) {
|
) {
|
||||||
shouldHighlight = true;
|
shouldHighlight = true;
|
||||||
} else {
|
} else if (childSelectors && childSelectors.length > 0) {
|
||||||
shouldHighlight = false;
|
console.log("✅ Child selectors present, highlighting enabled");
|
||||||
}
|
|
||||||
} else if (childSelectors && childSelectors.includes(selector)) {
|
|
||||||
shouldHighlight = true;
|
shouldHighlight = true;
|
||||||
} else if (elementInfo?.isIframeContent && childSelectors) {
|
|
||||||
const isIframeChild = childSelectors.some(
|
|
||||||
(childSelector: string) =>
|
|
||||||
selector.includes(":>>") &&
|
|
||||||
childSelector
|
|
||||||
.split(":>>")
|
|
||||||
.some((part) => selector.includes(part.trim()))
|
|
||||||
);
|
|
||||||
shouldHighlight = isIframeChild;
|
|
||||||
} else if (selector.includes(":>>") && hasValidChildSelectors) {
|
|
||||||
const selectorParts = selector
|
|
||||||
.split(":>>")
|
|
||||||
.map((part: string) => part.trim());
|
|
||||||
const isValidMixedSelector = selectorParts.some((part: any) =>
|
|
||||||
childSelectors!.some((childSelector) =>
|
|
||||||
childSelector.includes(part)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
} else if (elementInfo?.isShadowRoot && childSelectors) {
|
|
||||||
const isShadowChild = childSelectors.some(
|
|
||||||
(childSelector: string) =>
|
|
||||||
selector.includes(">>") &&
|
|
||||||
childSelector
|
|
||||||
.split(">>")
|
|
||||||
.some((part) => selector.includes(part.trim()))
|
|
||||||
);
|
|
||||||
} else if (selector.includes(">>") && hasValidChildSelectors) {
|
|
||||||
const selectorParts = selector
|
|
||||||
.split(">>")
|
|
||||||
.map((part: string) => part.trim());
|
|
||||||
const isValidMixedSelector = selectorParts.some((part: any) =>
|
|
||||||
childSelectors!.some((childSelector) =>
|
|
||||||
childSelector.includes(part)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
|
console.log("❌ No child selectors available");
|
||||||
shouldHighlight = false;
|
shouldHighlight = false;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
// No list selector - show regular highlighting
|
||||||
|
else {
|
||||||
shouldHighlight = true;
|
shouldHighlight = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// getText mode - always highlight
|
||||||
shouldHighlight = true;
|
shouldHighlight = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -316,6 +303,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
},
|
},
|
||||||
selector,
|
selector,
|
||||||
childSelectors,
|
childSelectors,
|
||||||
|
groupInfo,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -335,9 +323,11 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
[
|
[
|
||||||
|
getText,
|
||||||
getList,
|
getList,
|
||||||
listSelector,
|
listSelector,
|
||||||
paginationMode,
|
paginationMode,
|
||||||
|
cachedChildSelectors,
|
||||||
paginationType,
|
paginationType,
|
||||||
limitMode,
|
limitMode,
|
||||||
onHighlight,
|
onHighlight,
|
||||||
@@ -363,6 +353,10 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isInCaptureMode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const now = performance.now();
|
const now = performance.now();
|
||||||
if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) {
|
if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) {
|
||||||
return;
|
return;
|
||||||
@@ -401,11 +395,24 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
e.stopPropagation();
|
e.stopPropagation();
|
||||||
|
|
||||||
if (currentHighlight && onElementSelect) {
|
if (currentHighlight && onElementSelect) {
|
||||||
|
// Get the group info for the current highlight
|
||||||
|
const highlighterData =
|
||||||
|
clientSelectorGenerator.generateDataForHighlighter(
|
||||||
|
{ x: iframeX, y: iframeY },
|
||||||
|
iframeDoc,
|
||||||
|
true,
|
||||||
|
cachedChildSelectors
|
||||||
|
);
|
||||||
|
|
||||||
onElementSelect({
|
onElementSelect({
|
||||||
rect: currentHighlight.rect,
|
rect: currentHighlight.rect,
|
||||||
selector: currentHighlight.selector,
|
selector: currentHighlight.selector,
|
||||||
elementInfo: currentHighlight.elementInfo,
|
elementInfo: currentHighlight.elementInfo,
|
||||||
childSelectors: currentHighlight.childSelectors || [],
|
childSelectors:
|
||||||
|
cachedChildSelectors.length > 0
|
||||||
|
? cachedChildSelectors
|
||||||
|
: highlighterData?.childSelectors || [],
|
||||||
|
groupInfo: highlighterData?.groupInfo,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
notifyLastAction("select element");
|
notifyLastAction("select element");
|
||||||
@@ -790,11 +797,40 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
|
|
||||||
rebuiltHTML = "<!DOCTYPE html>\n" + rebuiltHTML;
|
rebuiltHTML = "<!DOCTYPE html>\n" + rebuiltHTML;
|
||||||
|
|
||||||
|
const additionalCSS = [];
|
||||||
|
|
||||||
|
if (snapshotData.resources.fonts?.length > 0) {
|
||||||
|
const fontCSS = snapshotData.resources.fonts
|
||||||
|
.map((font) => {
|
||||||
|
const format = font.format || "woff2";
|
||||||
|
return `
|
||||||
|
@font-face {
|
||||||
|
font-family: 'ProxiedFont-${
|
||||||
|
font.url.split("/").pop()?.split(".")[0] || "unknown"
|
||||||
|
}';
|
||||||
|
src: url("${font.dataUrl}") format("${format}");
|
||||||
|
font-display: swap;
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
})
|
||||||
|
.join("\n");
|
||||||
|
additionalCSS.push(fontCSS);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (snapshotData.resources.stylesheets?.length > 0) {
|
||||||
|
const externalCSS = snapshotData.resources.stylesheets
|
||||||
|
.map((stylesheet) => stylesheet.content)
|
||||||
|
.join("\n\n");
|
||||||
|
additionalCSS.push(externalCSS);
|
||||||
|
}
|
||||||
|
|
||||||
const enhancedCSS = `
|
const enhancedCSS = `
|
||||||
/* rrweb rebuilt content styles */
|
/* rrweb rebuilt content styles */
|
||||||
html, body {
|
html, body {
|
||||||
margin: 0 !important;
|
margin: 0 !important;
|
||||||
padding: 8px !important;
|
padding: 8px !important;
|
||||||
|
font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
|
||||||
|
background: white !important;
|
||||||
overflow-x: hidden !important;
|
overflow-x: hidden !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -819,12 +855,22 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
-ms-overflow-style: none !important; /* Internet Explorer 10+ */
|
-ms-overflow-style: none !important; /* Internet Explorer 10+ */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
img {
|
||||||
|
max-width: 100% !important;
|
||||||
|
height: auto !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Make everything interactive */
|
/* Make everything interactive */
|
||||||
* {
|
* {
|
||||||
cursor: "pointer" !important;
|
cursor: "pointer" !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Additional CSS from resources */
|
||||||
|
${additionalCSS.join("\n\n")}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
|
||||||
const headTagRegex = /<head[^>]*>/i;
|
const headTagRegex = /<head[^>]*>/i;
|
||||||
const cssInjection = `
|
const cssInjection = `
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import { useThemeMode } from '../../context/theme-provider';
|
|||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
||||||
import { clientListExtractor } from '../../helpers/clientListExtractor';
|
import { clientListExtractor } from '../../helpers/clientListExtractor';
|
||||||
|
import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator';
|
||||||
|
|
||||||
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
|
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
|
||||||
getActiveWorkflow(id).then(
|
getActiveWorkflow(id).then(
|
||||||
@@ -52,10 +53,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false);
|
const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false);
|
||||||
const [isCaptureListConfirmed, setIsCaptureListConfirmed] = useState(false);
|
const [isCaptureListConfirmed, setIsCaptureListConfirmed] = useState(false);
|
||||||
const { panelHeight } = useBrowserDimensionsStore();
|
const { panelHeight } = useBrowserDimensionsStore();
|
||||||
const [isDOMMode, setIsDOMMode] = useState(false);
|
|
||||||
const [currentSnapshot, setCurrentSnapshot] = useState<any>(null);
|
|
||||||
|
|
||||||
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId } = useGlobalInfoStore();
|
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, updateDOMMode, currentSnapshot, isDOMMode } = useGlobalInfoStore();
|
||||||
const {
|
const {
|
||||||
getText, startGetText, stopGetText,
|
getText, startGetText, stopGetText,
|
||||||
getList, startGetList, stopGetList,
|
getList, startGetList, stopGetList,
|
||||||
@@ -86,22 +85,20 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
if (socket) {
|
if (socket) {
|
||||||
const domModeHandler = (data: any) => {
|
const domModeHandler = (data: any) => {
|
||||||
if (!data.userId || data.userId === id) {
|
if (!data.userId || data.userId === id) {
|
||||||
setIsDOMMode(true);
|
updateDOMMode(true);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const screenshotModeHandler = (data: any) => {
|
const screenshotModeHandler = (data: any) => {
|
||||||
if (!data.userId || data.userId === id) {
|
if (!data.userId || data.userId === id) {
|
||||||
setIsDOMMode(false);
|
updateDOMMode(false);
|
||||||
setCurrentSnapshot(null);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const domcastHandler = (data: any) => {
|
const domcastHandler = (data: any) => {
|
||||||
if (!data.userId || data.userId === id) {
|
if (!data.userId || data.userId === id) {
|
||||||
if (data.snapshotData && data.snapshotData.snapshot) {
|
if (data.snapshotData && data.snapshotData.snapshot) {
|
||||||
setCurrentSnapshot(data.snapshotData);
|
updateDOMMode(true, data.snapshotData);
|
||||||
setIsDOMMode(true);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -116,7 +113,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
socket.off("domcast", domcastHandler);
|
socket.off("domcast", domcastHandler);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}, [socket, id]);
|
}, [socket, id, updateDOMMode]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (socket) {
|
if (socket) {
|
||||||
@@ -214,7 +211,6 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
) => {
|
) => {
|
||||||
if (isDOMMode && currentSnapshot) {
|
if (isDOMMode && currentSnapshot) {
|
||||||
try {
|
try {
|
||||||
// Find the DOM iframe element
|
|
||||||
let iframeElement = document.querySelector(
|
let iframeElement = document.querySelector(
|
||||||
"#dom-browser-iframe"
|
"#dom-browser-iframe"
|
||||||
) as HTMLIFrameElement;
|
) as HTMLIFrameElement;
|
||||||
@@ -247,22 +243,42 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use client-side extraction
|
Object.entries(fields).forEach(([key, field]) => {
|
||||||
|
if (field.selectorObj?.selector) {
|
||||||
|
const isFieldXPath =
|
||||||
|
field.selectorObj.selector.startsWith("//") ||
|
||||||
|
field.selectorObj.selector.startsWith("/");
|
||||||
|
console.log(
|
||||||
|
`Field "${key}" selector:`,
|
||||||
|
field.selectorObj.selector,
|
||||||
|
`(XPath: ${isFieldXPath})`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
const extractedData = clientListExtractor.extractListData(
|
const extractedData = clientListExtractor.extractListData(
|
||||||
iframeDoc,
|
iframeDoc,
|
||||||
listSelector,
|
listSelector,
|
||||||
fields,
|
fields,
|
||||||
5 // limit for preview
|
5
|
||||||
);
|
);
|
||||||
|
|
||||||
updateListStepData(currentListId, extractedData);
|
updateListStepData(currentListId, extractedData);
|
||||||
console.log("✅ UI extraction completed:");
|
|
||||||
|
if (extractedData.length === 0) {
|
||||||
|
console.warn(
|
||||||
|
"⚠️ No data extracted - this might indicate selector issues"
|
||||||
|
);
|
||||||
|
notify(
|
||||||
|
"warning",
|
||||||
|
"No data was extracted. Please verify your selections."
|
||||||
|
);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error in client-side data extraction:", error);
|
console.error("Error in client-side data extraction:", error);
|
||||||
notify("error", "Failed to extract data client-side");
|
notify("error", "Failed to extract data client-side");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Fallback to socket-based extraction for screenshot mode
|
|
||||||
if (!socket) {
|
if (!socket) {
|
||||||
console.error("Socket not available for backend extraction");
|
console.error("Socket not available for backend extraction");
|
||||||
return;
|
return;
|
||||||
@@ -275,8 +291,6 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
currentListId,
|
currentListId,
|
||||||
pagination: { type: "", selector: "" },
|
pagination: { type: "", selector: "" },
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("📤 Sent extraction request to server");
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error in backend data extraction:", error);
|
console.error("Error in backend data extraction:", error);
|
||||||
}
|
}
|
||||||
@@ -443,6 +457,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
resetInterpretationLog();
|
resetInterpretationLog();
|
||||||
finishAction('text');
|
finishAction('text');
|
||||||
onFinishCapture();
|
onFinishCapture();
|
||||||
|
clientSelectorGenerator.cleanup();
|
||||||
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog, finishAction, notify, onFinishCapture, t]);
|
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog, finishAction, notify, onFinishCapture, t]);
|
||||||
|
|
||||||
const getListSettingsObject = useCallback(() => {
|
const getListSettingsObject = useCallback(() => {
|
||||||
@@ -495,6 +510,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
||||||
const settings = getListSettingsObject();
|
const settings = getListSettingsObject();
|
||||||
|
|
||||||
|
console.log("rrwebSnapshotHandler", settings);
|
||||||
|
|
||||||
const latestListStep = getLatestListStep(browserSteps);
|
const latestListStep = getLatestListStep(browserSteps);
|
||||||
if (latestListStep && settings) {
|
if (latestListStep && settings) {
|
||||||
extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id);
|
extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id);
|
||||||
@@ -509,6 +526,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
resetInterpretationLog();
|
resetInterpretationLog();
|
||||||
finishAction('list');
|
finishAction('list');
|
||||||
onFinishCapture();
|
onFinishCapture();
|
||||||
|
clientSelectorGenerator.cleanup();
|
||||||
}, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide]);
|
}, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide]);
|
||||||
|
|
||||||
const hasUnconfirmedListTextFields = browserSteps.some(step =>
|
const hasUnconfirmedListTextFields = browserSteps.some(step =>
|
||||||
@@ -638,6 +656,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
|
|
||||||
setCurrentTextActionId('');
|
setCurrentTextActionId('');
|
||||||
setIsCaptureTextConfirmed(false);
|
setIsCaptureTextConfirmed(false);
|
||||||
|
clientSelectorGenerator.cleanup();
|
||||||
notify('error', t('right_panel.errors.capture_text_discarded'));
|
notify('error', t('right_panel.errors.capture_text_discarded'));
|
||||||
}, [currentTextActionId, browserSteps, stopGetText, deleteStepsByActionId, notify, t]);
|
}, [currentTextActionId, browserSteps, stopGetText, deleteStepsByActionId, notify, t]);
|
||||||
|
|
||||||
@@ -668,6 +687,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
setCaptureStage('initial');
|
setCaptureStage('initial');
|
||||||
setCurrentListActionId('');
|
setCurrentListActionId('');
|
||||||
setIsCaptureListConfirmed(false);
|
setIsCaptureListConfirmed(false);
|
||||||
|
clientSelectorGenerator.cleanup();
|
||||||
notify('error', t('right_panel.errors.capture_list_discarded'));
|
notify('error', t('right_panel.errors.capture_list_discarded'));
|
||||||
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t]);
|
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t]);
|
||||||
|
|
||||||
@@ -686,6 +706,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
stopGetScreenshot();
|
stopGetScreenshot();
|
||||||
resetInterpretationLog();
|
resetInterpretationLog();
|
||||||
finishAction('screenshot');
|
finishAction('screenshot');
|
||||||
|
clientSelectorGenerator.cleanup();
|
||||||
onFinishCapture();
|
onFinishCapture();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,41 @@ interface ScheduleConfig {
|
|||||||
cronExpression?: string;
|
cronExpression?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ProcessedSnapshot {
|
||||||
|
snapshot: any;
|
||||||
|
resources: {
|
||||||
|
stylesheets: Array<{
|
||||||
|
href: string;
|
||||||
|
content: string;
|
||||||
|
media?: string;
|
||||||
|
}>;
|
||||||
|
images: Array<{
|
||||||
|
src: string;
|
||||||
|
dataUrl: string;
|
||||||
|
alt?: string;
|
||||||
|
}>;
|
||||||
|
fonts: Array<{
|
||||||
|
url: string;
|
||||||
|
dataUrl: string;
|
||||||
|
format?: string;
|
||||||
|
}>;
|
||||||
|
scripts: Array<{
|
||||||
|
src: string;
|
||||||
|
content: string;
|
||||||
|
type?: string;
|
||||||
|
}>;
|
||||||
|
media: Array<{
|
||||||
|
src: string;
|
||||||
|
dataUrl: string;
|
||||||
|
type: string;
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
baseUrl: string;
|
||||||
|
viewport: { width: number; height: number };
|
||||||
|
timestamp: number;
|
||||||
|
processingStats: any;
|
||||||
|
}
|
||||||
|
|
||||||
export interface RobotSettings {
|
export interface RobotSettings {
|
||||||
id: string;
|
id: string;
|
||||||
userId?: number;
|
userId?: number;
|
||||||
@@ -86,6 +121,11 @@ interface GlobalInfo {
|
|||||||
setCurrentListActionId: (actionId: string) => void;
|
setCurrentListActionId: (actionId: string) => void;
|
||||||
currentScreenshotActionId: string;
|
currentScreenshotActionId: string;
|
||||||
setCurrentScreenshotActionId: (actionId: string) => void;
|
setCurrentScreenshotActionId: (actionId: string) => void;
|
||||||
|
isDOMMode: boolean;
|
||||||
|
setIsDOMMode: (isDOMMode: boolean) => void;
|
||||||
|
currentSnapshot: ProcessedSnapshot | null;
|
||||||
|
setCurrentSnapshot: (snapshot: ProcessedSnapshot | null) => void;
|
||||||
|
updateDOMMode: (isDOMMode: boolean, snapshot?: ProcessedSnapshot | null) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GlobalInfoStore implements Partial<GlobalInfo> {
|
class GlobalInfoStore implements Partial<GlobalInfo> {
|
||||||
@@ -115,6 +155,8 @@ class GlobalInfoStore implements Partial<GlobalInfo> {
|
|||||||
currentTextActionId = '';
|
currentTextActionId = '';
|
||||||
currentListActionId = '';
|
currentListActionId = '';
|
||||||
currentScreenshotActionId = '';
|
currentScreenshotActionId = '';
|
||||||
|
isDOMMode = false;
|
||||||
|
currentSnapshot = null;
|
||||||
};
|
};
|
||||||
|
|
||||||
const globalInfoStore = new GlobalInfoStore();
|
const globalInfoStore = new GlobalInfoStore();
|
||||||
@@ -141,6 +183,8 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
|||||||
const [currentTextActionId, setCurrentTextActionId] = useState<string>('');
|
const [currentTextActionId, setCurrentTextActionId] = useState<string>('');
|
||||||
const [currentListActionId, setCurrentListActionId] = useState<string>('');
|
const [currentListActionId, setCurrentListActionId] = useState<string>('');
|
||||||
const [currentScreenshotActionId, setCurrentScreenshotActionId] = useState<string>('');
|
const [currentScreenshotActionId, setCurrentScreenshotActionId] = useState<string>('');
|
||||||
|
const [isDOMMode, setIsDOMMode] = useState<boolean>(globalInfoStore.isDOMMode);
|
||||||
|
const [currentSnapshot, setCurrentSnapshot] = useState<ProcessedSnapshot | null>(globalInfoStore.currentSnapshot);
|
||||||
|
|
||||||
const notify = (severity: 'error' | 'warning' | 'info' | 'success', message: string) => {
|
const notify = (severity: 'error' | 'warning' | 'info' | 'success', message: string) => {
|
||||||
setNotification({ severity, message, isOpen: true });
|
setNotification({ severity, message, isOpen: true });
|
||||||
@@ -165,6 +209,18 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
|||||||
}, 100);
|
}, 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const updateDOMMode = (mode: boolean, snapshot?: ProcessedSnapshot | null) => {
|
||||||
|
setIsDOMMode(mode);
|
||||||
|
|
||||||
|
if (snapshot !== undefined) {
|
||||||
|
setCurrentSnapshot(snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mode) {
|
||||||
|
setCurrentSnapshot(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<globalInfoContext.Provider
|
<globalInfoContext.Provider
|
||||||
value={{
|
value={{
|
||||||
@@ -205,6 +261,11 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => {
|
|||||||
setCurrentListActionId,
|
setCurrentListActionId,
|
||||||
currentScreenshotActionId,
|
currentScreenshotActionId,
|
||||||
setCurrentScreenshotActionId,
|
setCurrentScreenshotActionId,
|
||||||
|
isDOMMode,
|
||||||
|
setIsDOMMode,
|
||||||
|
currentSnapshot,
|
||||||
|
setCurrentSnapshot,
|
||||||
|
updateDOMMode,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{children}
|
{children}
|
||||||
|
|||||||
@@ -15,31 +15,90 @@ interface ExtractedListData {
|
|||||||
[key: string]: string;
|
[key: string]: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface TableField {
|
interface Field {
|
||||||
selector: string;
|
selector: string;
|
||||||
attribute: string;
|
attribute: string;
|
||||||
tableContext?: string;
|
|
||||||
cellIndex?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface NonTableField {
|
|
||||||
selector: string;
|
|
||||||
attribute: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ContainerFields {
|
|
||||||
tableFields: Record<string, TableField>;
|
|
||||||
nonTableFields: Record<string, NonTableField>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class ClientListExtractor {
|
class ClientListExtractor {
|
||||||
|
private evaluateXPath = (
|
||||||
|
rootElement: Element | Document,
|
||||||
|
xpath: string
|
||||||
|
): Element | null => {
|
||||||
|
try {
|
||||||
|
const ownerDoc =
|
||||||
|
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||||
|
? (rootElement as Document)
|
||||||
|
: rootElement.ownerDocument;
|
||||||
|
|
||||||
|
if (!ownerDoc) return null;
|
||||||
|
|
||||||
|
const result = ownerDoc.evaluate(
|
||||||
|
xpath,
|
||||||
|
rootElement,
|
||||||
|
null,
|
||||||
|
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
return result.singleNodeValue as Element | null;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("XPath evaluation failed:", xpath, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private evaluateXPathAll = (
|
||||||
|
rootElement: Element | Document,
|
||||||
|
xpath: string
|
||||||
|
): Element[] => {
|
||||||
|
try {
|
||||||
|
const ownerDoc =
|
||||||
|
rootElement.nodeType === Node.DOCUMENT_NODE
|
||||||
|
? (rootElement as Document)
|
||||||
|
: rootElement.ownerDocument;
|
||||||
|
|
||||||
|
if (!ownerDoc) return [];
|
||||||
|
|
||||||
|
const result = ownerDoc.evaluate(
|
||||||
|
xpath,
|
||||||
|
rootElement,
|
||||||
|
null,
|
||||||
|
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
const elements: Element[] = [];
|
||||||
|
for (let i = 0; i < result.snapshotLength; i++) {
|
||||||
|
const node = result.snapshotItem(i);
|
||||||
|
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||||
|
elements.push(node as Element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return elements;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("XPath evaluation failed:", xpath, error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
private queryElement = (
|
private queryElement = (
|
||||||
rootElement: Element | Document,
|
rootElement: Element | Document,
|
||||||
selector: string
|
selector: string
|
||||||
): Element | null => {
|
): Element | null => {
|
||||||
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||||
|
// Check if it's an XPath selector (starts with // or / or ./)
|
||||||
|
if (
|
||||||
|
selector.startsWith("//") ||
|
||||||
|
selector.startsWith("/") ||
|
||||||
|
selector.startsWith("./")
|
||||||
|
) {
|
||||||
|
return this.evaluateXPath(rootElement, selector);
|
||||||
|
} else {
|
||||||
return rootElement.querySelector(selector);
|
return rootElement.querySelector(selector);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||||
let currentElement: Element | Document | null = rootElement;
|
let currentElement: Element | Document | null = rootElement;
|
||||||
@@ -59,7 +118,17 @@ class ClientListExtractor {
|
|||||||
frameElement.contentDocument ||
|
frameElement.contentDocument ||
|
||||||
frameElement.contentWindow?.document;
|
frameElement.contentWindow?.document;
|
||||||
if (!frameDoc) return null;
|
if (!frameDoc) return null;
|
||||||
|
|
||||||
|
// Handle XPath in iframe context
|
||||||
|
if (
|
||||||
|
parts[i].startsWith("//") ||
|
||||||
|
parts[i].startsWith("/") ||
|
||||||
|
parts[i].startsWith("./")
|
||||||
|
) {
|
||||||
|
currentElement = this.evaluateXPath(frameDoc, parts[i]);
|
||||||
|
} else {
|
||||||
currentElement = frameDoc.querySelector(parts[i]);
|
currentElement = frameDoc.querySelector(parts[i]);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(
|
console.warn(
|
||||||
@@ -75,18 +144,38 @@ class ClientListExtractor {
|
|||||||
let nextElement: Element | null = null;
|
let nextElement: Element | null = null;
|
||||||
|
|
||||||
if ("querySelector" in currentElement) {
|
if ("querySelector" in currentElement) {
|
||||||
|
// Handle XPath vs CSS selector
|
||||||
|
if (
|
||||||
|
parts[i].startsWith("//") ||
|
||||||
|
parts[i].startsWith("/") ||
|
||||||
|
parts[i].startsWith("./")
|
||||||
|
) {
|
||||||
|
nextElement = this.evaluateXPath(currentElement, parts[i]);
|
||||||
|
} else {
|
||||||
nextElement = currentElement.querySelector(parts[i]);
|
nextElement = currentElement.querySelector(parts[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
!nextElement &&
|
!nextElement &&
|
||||||
"shadowRoot" in currentElement &&
|
"shadowRoot" in currentElement &&
|
||||||
(currentElement as Element).shadowRoot
|
(currentElement as Element).shadowRoot
|
||||||
) {
|
) {
|
||||||
|
if (
|
||||||
|
parts[i].startsWith("//") ||
|
||||||
|
parts[i].startsWith("/") ||
|
||||||
|
parts[i].startsWith("./")
|
||||||
|
) {
|
||||||
|
nextElement = this.evaluateXPath(
|
||||||
|
(currentElement as Element).shadowRoot as unknown as Document,
|
||||||
|
parts[i]
|
||||||
|
);
|
||||||
|
} else {
|
||||||
nextElement = (currentElement as Element).shadowRoot!.querySelector(
|
nextElement = (currentElement as Element).shadowRoot!.querySelector(
|
||||||
parts[i]
|
parts[i]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!nextElement && "children" in currentElement) {
|
if (!nextElement && "children" in currentElement) {
|
||||||
const children: any = Array.from(
|
const children: any = Array.from(
|
||||||
@@ -94,7 +183,18 @@ class ClientListExtractor {
|
|||||||
);
|
);
|
||||||
for (const child of children) {
|
for (const child of children) {
|
||||||
if (child.shadowRoot) {
|
if (child.shadowRoot) {
|
||||||
|
if (
|
||||||
|
parts[i].startsWith("//") ||
|
||||||
|
parts[i].startsWith("/") ||
|
||||||
|
parts[i].startsWith("./")
|
||||||
|
) {
|
||||||
|
nextElement = this.evaluateXPath(
|
||||||
|
child.shadowRoot as unknown as Document,
|
||||||
|
parts[i]
|
||||||
|
);
|
||||||
|
} else {
|
||||||
nextElement = child.shadowRoot.querySelector(parts[i]);
|
nextElement = child.shadowRoot.querySelector(parts[i]);
|
||||||
|
}
|
||||||
if (nextElement) break;
|
if (nextElement) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -111,8 +211,13 @@ class ClientListExtractor {
|
|||||||
selector: string
|
selector: string
|
||||||
): Element[] => {
|
): Element[] => {
|
||||||
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
if (!selector.includes(">>") && !selector.includes(":>>")) {
|
||||||
|
// Check if it's an XPath selector (starts with // or /)
|
||||||
|
if (selector.startsWith("//") || selector.startsWith("/")) {
|
||||||
|
return this.evaluateXPathAll(rootElement, selector);
|
||||||
|
} else {
|
||||||
return Array.from(rootElement.querySelectorAll(selector));
|
return Array.from(rootElement.querySelectorAll(selector));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
|
||||||
let currentElements: (Element | Document)[] = [rootElement];
|
let currentElements: (Element | Document)[] = [rootElement];
|
||||||
@@ -133,7 +238,14 @@ class ClientListExtractor {
|
|||||||
frameElement.contentDocument ||
|
frameElement.contentDocument ||
|
||||||
frameElement.contentWindow?.document;
|
frameElement.contentWindow?.document;
|
||||||
if (frameDoc) {
|
if (frameDoc) {
|
||||||
nextElements.push(...Array.from(frameDoc.querySelectorAll(part)));
|
// Handle XPath in iframe context
|
||||||
|
if (part.startsWith("//") || part.startsWith("/")) {
|
||||||
|
nextElements.push(...this.evaluateXPathAll(frameDoc, part));
|
||||||
|
} else {
|
||||||
|
nextElements.push(
|
||||||
|
...Array.from(frameDoc.querySelectorAll(part))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn(
|
console.warn(
|
||||||
@@ -146,21 +258,43 @@ class ClientListExtractor {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ("querySelectorAll" in element) {
|
if ("querySelectorAll" in element) {
|
||||||
|
// Handle XPath vs CSS selector
|
||||||
|
if (part.startsWith("//") || part.startsWith("/")) {
|
||||||
|
nextElements.push(...this.evaluateXPathAll(element, part));
|
||||||
|
} else {
|
||||||
nextElements.push(...Array.from(element.querySelectorAll(part)));
|
nextElements.push(...Array.from(element.querySelectorAll(part)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ("shadowRoot" in element && (element as Element).shadowRoot) {
|
if ("shadowRoot" in element && (element as Element).shadowRoot) {
|
||||||
|
if (part.startsWith("//") || part.startsWith("/")) {
|
||||||
|
nextElements.push(
|
||||||
|
...this.evaluateXPathAll(
|
||||||
|
(element as Element).shadowRoot as unknown as Document,
|
||||||
|
part
|
||||||
|
)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
nextElements.push(
|
nextElements.push(
|
||||||
...Array.from(
|
...Array.from(
|
||||||
(element as Element).shadowRoot!.querySelectorAll(part)
|
(element as Element).shadowRoot!.querySelectorAll(part)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ("children" in element) {
|
if ("children" in element) {
|
||||||
const children = Array.from((element as Element).children || []);
|
const children = Array.from((element as Element).children || []);
|
||||||
for (const child of children) {
|
for (const child of children) {
|
||||||
if (child.shadowRoot) {
|
if (child.shadowRoot) {
|
||||||
|
if (part.startsWith("//") || part.startsWith("/")) {
|
||||||
|
nextElements.push(
|
||||||
|
...this.evaluateXPathAll(
|
||||||
|
child.shadowRoot as unknown as Document,
|
||||||
|
part
|
||||||
|
)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
nextElements.push(
|
nextElements.push(
|
||||||
...Array.from(child.shadowRoot.querySelectorAll(part))
|
...Array.from(child.shadowRoot.querySelectorAll(part))
|
||||||
);
|
);
|
||||||
@@ -169,6 +303,7 @@ class ClientListExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
currentElements = nextElements;
|
currentElements = nextElements;
|
||||||
}
|
}
|
||||||
@@ -193,36 +328,67 @@ class ClientListExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (attribute === "innerText") {
|
if (attribute === "innerText") {
|
||||||
return (element as HTMLElement).innerText?.trim() || null;
|
// First try standard innerText/textContent
|
||||||
} else if (attribute === "innerHTML") {
|
let textContent =
|
||||||
return element.innerHTML?.trim() || null;
|
(element as HTMLElement).innerText?.trim() ||
|
||||||
} else if (attribute === "src" || attribute === "href") {
|
(element as HTMLElement).textContent?.trim();
|
||||||
if (attribute === "href" && element.tagName !== "A") {
|
|
||||||
const parentElement = element.parentElement;
|
// If empty, check for common data attributes that might contain the text
|
||||||
if (parentElement && parentElement.tagName === "A") {
|
if (!textContent) {
|
||||||
const parentHref = parentElement.getAttribute("href");
|
// Check for data-* attributes that commonly contain text values
|
||||||
if (parentHref) {
|
const dataAttributes = [
|
||||||
try {
|
"data-600",
|
||||||
return new URL(parentHref, baseURL).href;
|
"data-text",
|
||||||
} catch (e) {
|
"data-label",
|
||||||
return parentHref;
|
"data-value",
|
||||||
}
|
"data-content",
|
||||||
|
];
|
||||||
|
for (const attr of dataAttributes) {
|
||||||
|
const dataValue = element.getAttribute(attr);
|
||||||
|
if (dataValue && dataValue.trim()) {
|
||||||
|
textContent = dataValue.trim();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return textContent || null;
|
||||||
|
} else if (attribute === "innerHTML") {
|
||||||
|
return element.innerHTML?.trim() || null;
|
||||||
|
} else if (attribute === "href") {
|
||||||
|
// For href, we need to find the anchor tag if the current element isn't one
|
||||||
|
let anchorElement = element;
|
||||||
|
|
||||||
|
// If current element is not an anchor, look for parent anchor
|
||||||
|
if (element.tagName !== "A") {
|
||||||
|
anchorElement =
|
||||||
|
element.closest("a") ||
|
||||||
|
element.parentElement?.closest("a") ||
|
||||||
|
element;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hrefValue = anchorElement.getAttribute("href");
|
||||||
|
if (!hrefValue || hrefValue.trim() === "") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return new URL(hrefValue, baseURL).href;
|
||||||
|
} catch (e) {
|
||||||
|
console.warn("Error creating URL from", hrefValue, e);
|
||||||
|
return hrefValue;
|
||||||
|
}
|
||||||
|
} else if (attribute === "src") {
|
||||||
const attrValue = element.getAttribute(attribute);
|
const attrValue = element.getAttribute(attribute);
|
||||||
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
||||||
|
|
||||||
if (!dataAttr || dataAttr.trim() === "") {
|
if (!dataAttr || dataAttr.trim() === "") {
|
||||||
if (attribute === "src") {
|
|
||||||
const style = window.getComputedStyle(element as HTMLElement);
|
const style = window.getComputedStyle(element as HTMLElement);
|
||||||
const bgImage = style.backgroundImage;
|
const bgImage = style.backgroundImage;
|
||||||
if (bgImage && bgImage !== "none") {
|
if (bgImage && bgImage !== "none") {
|
||||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
||||||
return matches ? new URL(matches[1], baseURL).href : null;
|
return matches ? new URL(matches[1], baseURL).href : null;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,187 +402,8 @@ class ClientListExtractor {
|
|||||||
return element.getAttribute(attribute);
|
return element.getAttribute(attribute);
|
||||||
};
|
};
|
||||||
|
|
||||||
private findTableAncestor = (
|
private convertFields = (fields: any): Record<string, Field> => {
|
||||||
element: Element
|
const convertedFields: Record<string, Field> = {};
|
||||||
): { type: string; element: Element } | null => {
|
|
||||||
let currentElement: Element | null = element;
|
|
||||||
const MAX_DEPTH = 5;
|
|
||||||
let depth = 0;
|
|
||||||
|
|
||||||
while (currentElement && depth < MAX_DEPTH) {
|
|
||||||
if (currentElement.getRootNode() instanceof ShadowRoot) {
|
|
||||||
currentElement = (currentElement.getRootNode() as ShadowRoot).host;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (currentElement.tagName === "TD") {
|
|
||||||
return { type: "TD", element: currentElement };
|
|
||||||
} else if (currentElement.tagName === "TR") {
|
|
||||||
return { type: "TR", element: currentElement };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
|
||||||
currentElement.tagName === "IFRAME" ||
|
|
||||||
currentElement.tagName === "FRAME"
|
|
||||||
) {
|
|
||||||
try {
|
|
||||||
const frameElement = currentElement as
|
|
||||||
| HTMLIFrameElement
|
|
||||||
| HTMLFrameElement;
|
|
||||||
currentElement = frameElement.contentDocument?.body || null;
|
|
||||||
} catch (e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
currentElement = currentElement.parentElement;
|
|
||||||
}
|
|
||||||
depth++;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
};
|
|
||||||
|
|
||||||
private getCellIndex = (td: Element): number => {
|
|
||||||
if (td.getRootNode() instanceof ShadowRoot) {
|
|
||||||
const shadowRoot = td.getRootNode() as ShadowRoot;
|
|
||||||
const allCells = Array.from(shadowRoot.querySelectorAll("td"));
|
|
||||||
return allCells.indexOf(td as HTMLTableCellElement);
|
|
||||||
}
|
|
||||||
|
|
||||||
let index = 0;
|
|
||||||
let sibling = td;
|
|
||||||
while ((sibling = sibling.previousElementSibling as Element)) {
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
};
|
|
||||||
|
|
||||||
private hasThElement = (
|
|
||||||
row: Element,
|
|
||||||
tableFields: Record<string, TableField>
|
|
||||||
): boolean => {
|
|
||||||
for (const [_, { selector }] of Object.entries(tableFields)) {
|
|
||||||
const element = this.queryElement(row, selector);
|
|
||||||
if (element) {
|
|
||||||
let current: Element | ShadowRoot | Document | null = element;
|
|
||||||
while (current && current !== row) {
|
|
||||||
if (current.getRootNode() instanceof ShadowRoot) {
|
|
||||||
current = (current.getRootNode() as ShadowRoot).host;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((current as Element).tagName === "TH") return true;
|
|
||||||
|
|
||||||
if (
|
|
||||||
(current as Element).tagName === "IFRAME" ||
|
|
||||||
(current as Element).tagName === "FRAME"
|
|
||||||
) {
|
|
||||||
try {
|
|
||||||
const frameElement = current as
|
|
||||||
| HTMLIFrameElement
|
|
||||||
| HTMLFrameElement;
|
|
||||||
current = frameElement.contentDocument?.body || null;
|
|
||||||
} catch (e) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
current = (current as Element).parentElement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
private filterRowsBasedOnTag = (
|
|
||||||
rows: Element[],
|
|
||||||
tableFields: Record<string, TableField>
|
|
||||||
): Element[] => {
|
|
||||||
for (const row of rows) {
|
|
||||||
if (this.hasThElement(row, tableFields)) {
|
|
||||||
return rows;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rows.filter((row) => {
|
|
||||||
const directTH = row.getElementsByTagName("TH").length === 0;
|
|
||||||
const shadowTH = row.shadowRoot
|
|
||||||
? row.shadowRoot.querySelector("th") === null
|
|
||||||
: true;
|
|
||||||
return directTH && shadowTH;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
private calculateClassSimilarity = (
|
|
||||||
classList1: string[],
|
|
||||||
classList2: string[]
|
|
||||||
): number => {
|
|
||||||
const set1 = new Set(classList1);
|
|
||||||
const set2 = new Set(classList2);
|
|
||||||
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
||||||
const union = new Set([...set1, ...set2]);
|
|
||||||
return intersection.size / union.size;
|
|
||||||
};
|
|
||||||
|
|
||||||
private findSimilarElements = (
|
|
||||||
baseElement: Element,
|
|
||||||
document: Document,
|
|
||||||
similarityThreshold: number = 0.7
|
|
||||||
): Element[] => {
|
|
||||||
const baseClasses = Array.from(baseElement.classList);
|
|
||||||
if (baseClasses.length === 0) return [];
|
|
||||||
|
|
||||||
const allElements: Element[] = [];
|
|
||||||
|
|
||||||
allElements.push(
|
|
||||||
...Array.from(document.getElementsByTagName(baseElement.tagName))
|
|
||||||
);
|
|
||||||
|
|
||||||
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
|
||||||
const shadowHost = (baseElement.getRootNode() as ShadowRoot).host;
|
|
||||||
allElements.push(
|
|
||||||
...Array.from(shadowHost.getElementsByTagName(baseElement.tagName))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const frames = [
|
|
||||||
...Array.from(document.getElementsByTagName("iframe")),
|
|
||||||
...Array.from(document.getElementsByTagName("frame")),
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const frame of frames) {
|
|
||||||
try {
|
|
||||||
const frameElement = frame as HTMLIFrameElement | HTMLFrameElement;
|
|
||||||
const frameDoc =
|
|
||||||
frameElement.contentDocument || frameElement.contentWindow?.document;
|
|
||||||
if (frameDoc) {
|
|
||||||
allElements.push(
|
|
||||||
...Array.from(frameDoc.getElementsByTagName(baseElement.tagName))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(
|
|
||||||
`Cannot access ${frame.tagName.toLowerCase()} content:`,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return allElements.filter((element) => {
|
|
||||||
if (element === baseElement) return false;
|
|
||||||
const similarity = this.calculateClassSimilarity(
|
|
||||||
baseClasses,
|
|
||||||
Array.from(element.classList)
|
|
||||||
);
|
|
||||||
return similarity >= similarityThreshold;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
private convertFields = (
|
|
||||||
fields: any
|
|
||||||
): Record<string, { selector: string; attribute: string }> => {
|
|
||||||
const convertedFields: Record<
|
|
||||||
string,
|
|
||||||
{ selector: string; attribute: string }
|
|
||||||
> = {};
|
|
||||||
|
|
||||||
for (const [key, field] of Object.entries(fields)) {
|
for (const [key, field] of Object.entries(fields)) {
|
||||||
const typedField = field as TextStep;
|
const typedField = field as TextStep;
|
||||||
@@ -439,285 +426,134 @@ class ClientListExtractor {
|
|||||||
// Convert fields to the format expected by the extraction logic
|
// Convert fields to the format expected by the extraction logic
|
||||||
const convertedFields = this.convertFields(fields);
|
const convertedFields = this.convertFields(fields);
|
||||||
|
|
||||||
// Get all container elements matching the list selector
|
// Step 1: Get all container elements matching the list selector
|
||||||
let containers = this.queryElementAll(iframeDocument, listSelector);
|
const containers = this.queryElementAll(iframeDocument, listSelector);
|
||||||
|
|
||||||
if (containers.length === 0) {
|
if (containers.length === 0) {
|
||||||
console.warn("No containers found for listSelector:", listSelector);
|
console.warn("❌ No containers found for listSelector:", listSelector);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enhanced container discovery: find similar elements if we need more containers
|
// Step 2: Extract data from each container up to the limit
|
||||||
if (limit > 1 && containers.length === 1) {
|
const extractedData: ExtractedListData[] = [];
|
||||||
const baseContainer = containers[0];
|
const containersToProcess = Math.min(containers.length, limit);
|
||||||
const similarContainers = this.findSimilarElements(
|
|
||||||
baseContainer,
|
|
||||||
iframeDocument,
|
|
||||||
0.7
|
|
||||||
);
|
|
||||||
|
|
||||||
if (similarContainers.length > 0) {
|
|
||||||
const newContainers = similarContainers.filter(
|
|
||||||
(container) => !container.matches(listSelector)
|
|
||||||
);
|
|
||||||
containers = [...containers, ...newContainers];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Analyze fields for table vs non-table context
|
|
||||||
const containerFields: ContainerFields[] = containers.map(() => ({
|
|
||||||
tableFields: {},
|
|
||||||
nonTableFields: {},
|
|
||||||
}));
|
|
||||||
|
|
||||||
containers.forEach((container, containerIndex) => {
|
|
||||||
for (const [label, field] of Object.entries(convertedFields)) {
|
|
||||||
const sampleElement = this.queryElement(container, field.selector);
|
|
||||||
|
|
||||||
if (sampleElement) {
|
|
||||||
const ancestor = this.findTableAncestor(sampleElement);
|
|
||||||
if (ancestor) {
|
|
||||||
containerFields[containerIndex].tableFields[label] = {
|
|
||||||
...field,
|
|
||||||
tableContext: ancestor.type,
|
|
||||||
cellIndex:
|
|
||||||
ancestor.type === "TD"
|
|
||||||
? this.getCellIndex(ancestor.element)
|
|
||||||
: -1,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
containerFields[containerIndex].nonTableFields[label] = field;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
containerFields[containerIndex].nonTableFields[label] = field;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Extract table data
|
|
||||||
const tableData: ExtractedListData[] = [];
|
|
||||||
for (
|
for (
|
||||||
let containerIndex = 0;
|
let containerIndex = 0;
|
||||||
containerIndex < containers.length;
|
containerIndex < containersToProcess;
|
||||||
containerIndex++
|
containerIndex++
|
||||||
) {
|
) {
|
||||||
const container = containers[containerIndex];
|
const container = containers[containerIndex];
|
||||||
const { tableFields } = containerFields[containerIndex];
|
|
||||||
|
|
||||||
if (Object.keys(tableFields).length > 0) {
|
|
||||||
const firstField = Object.values(tableFields)[0];
|
|
||||||
const firstElement = this.queryElement(
|
|
||||||
container,
|
|
||||||
firstField.selector
|
|
||||||
);
|
|
||||||
let tableContext: Element | null = firstElement;
|
|
||||||
|
|
||||||
// Find the table context
|
|
||||||
while (
|
|
||||||
tableContext &&
|
|
||||||
tableContext.tagName !== "TABLE" &&
|
|
||||||
tableContext !== container
|
|
||||||
) {
|
|
||||||
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
|
||||||
tableContext = (tableContext.getRootNode() as ShadowRoot).host;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
|
||||||
tableContext.tagName === "IFRAME" ||
|
|
||||||
tableContext.tagName === "FRAME"
|
|
||||||
) {
|
|
||||||
try {
|
|
||||||
const frameElement = tableContext as
|
|
||||||
| HTMLIFrameElement
|
|
||||||
| HTMLFrameElement;
|
|
||||||
tableContext = frameElement.contentDocument?.body || null;
|
|
||||||
} catch (e) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tableContext = tableContext.parentElement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tableContext) {
|
|
||||||
const rows: Element[] = [];
|
|
||||||
rows.push(...Array.from(tableContext.getElementsByTagName("TR")));
|
|
||||||
|
|
||||||
if (
|
|
||||||
tableContext.tagName === "IFRAME" ||
|
|
||||||
tableContext.tagName === "FRAME"
|
|
||||||
) {
|
|
||||||
try {
|
|
||||||
const frameElement = tableContext as
|
|
||||||
| HTMLIFrameElement
|
|
||||||
| HTMLFrameElement;
|
|
||||||
const frameDoc =
|
|
||||||
frameElement.contentDocument ||
|
|
||||||
frameElement.contentWindow?.document;
|
|
||||||
if (frameDoc) {
|
|
||||||
rows.push(...Array.from(frameDoc.getElementsByTagName("TR")));
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(
|
|
||||||
`Cannot access ${tableContext.tagName.toLowerCase()} rows:`,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const processedRows = this.filterRowsBasedOnTag(rows, tableFields);
|
|
||||||
|
|
||||||
for (
|
|
||||||
let rowIndex = 0;
|
|
||||||
rowIndex < Math.min(processedRows.length, limit);
|
|
||||||
rowIndex++
|
|
||||||
) {
|
|
||||||
const record: ExtractedListData = {};
|
const record: ExtractedListData = {};
|
||||||
const currentRow = processedRows[rowIndex];
|
|
||||||
|
|
||||||
for (const [
|
// Step 3: For each field, extract data from the current container
|
||||||
label,
|
for (const [label, { selector, attribute }] of Object.entries(
|
||||||
{ selector, attribute, cellIndex },
|
convertedFields
|
||||||
] of Object.entries(tableFields)) {
|
)) {
|
||||||
let element: Element | null = null;
|
let element: Element | null = null;
|
||||||
|
|
||||||
if (cellIndex !== undefined && cellIndex >= 0) {
|
// CORRECT APPROACH: Create indexed absolute XPath
|
||||||
let td: Element | null =
|
if (selector.startsWith("//")) {
|
||||||
currentRow.children[cellIndex] || null;
|
// Convert the absolute selector to target the specific container instance
|
||||||
|
const indexedSelector = this.createIndexedXPath(
|
||||||
|
selector,
|
||||||
|
listSelector,
|
||||||
|
containerIndex + 1
|
||||||
|
);
|
||||||
|
|
||||||
if (!td && currentRow.shadowRoot) {
|
element = this.evaluateXPathSingle(iframeDocument, indexedSelector);
|
||||||
const shadowCells = currentRow.shadowRoot.children;
|
|
||||||
if (shadowCells && shadowCells.length > cellIndex) {
|
|
||||||
td = shadowCells[cellIndex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (td) {
|
|
||||||
element = this.queryElement(td, selector);
|
|
||||||
|
|
||||||
if (
|
|
||||||
!element &&
|
|
||||||
selector
|
|
||||||
.split(/(?:>>|:>>)/)
|
|
||||||
.pop()
|
|
||||||
?.includes("td:nth-child")
|
|
||||||
) {
|
|
||||||
element = td;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!element) {
|
|
||||||
const tagOnlySelector = selector.split(".")[0];
|
|
||||||
element = this.queryElement(td, tagOnlySelector);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!element) {
|
|
||||||
let currentElement: Element | null = td;
|
|
||||||
while (
|
|
||||||
currentElement &&
|
|
||||||
currentElement.children.length > 0
|
|
||||||
) {
|
|
||||||
let foundContentChild = false;
|
|
||||||
for (const child of Array.from(
|
|
||||||
currentElement.children
|
|
||||||
)) {
|
|
||||||
if (this.extractValue(child, attribute)) {
|
|
||||||
currentElement = child;
|
|
||||||
foundContentChild = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!foundContentChild) break;
|
|
||||||
}
|
|
||||||
element = currentElement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
element = this.queryElement(currentRow, selector);
|
// Fallback for non-XPath selectors
|
||||||
|
element = this.queryElement(container, selector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 4: Extract the value from the found element
|
||||||
if (element) {
|
if (element) {
|
||||||
const value = this.extractValue(element, attribute);
|
const value = this.extractValue(element, attribute);
|
||||||
if (value !== null && value !== "") {
|
if (value !== null && value !== "") {
|
||||||
record[label] = value;
|
record[label] = value;
|
||||||
} else {
|
} else {
|
||||||
console.warn(
|
console.warn(` ⚠️ Empty value for "${label}"`);
|
||||||
`❌ No value for ${label} in row ${rowIndex + 1}`
|
|
||||||
);
|
|
||||||
record[label] = "";
|
record[label] = "";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.warn(
|
console.warn(` ❌ Element not found for "${label}"`);
|
||||||
`❌ Element not found for ${label} with selector:`,
|
|
||||||
selector
|
|
||||||
);
|
|
||||||
record[label] = "";
|
record[label] = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 5: Add record if it has any non-empty values
|
||||||
if (Object.values(record).some((value) => value !== "")) {
|
if (Object.values(record).some((value) => value !== "")) {
|
||||||
tableData.push(record);
|
extractedData.push(record);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract non-table data
|
|
||||||
const nonTableData: ExtractedListData[] = [];
|
|
||||||
for (
|
|
||||||
let containerIndex = 0;
|
|
||||||
containerIndex < containers.length;
|
|
||||||
containerIndex++
|
|
||||||
) {
|
|
||||||
if (nonTableData.length >= limit) break;
|
|
||||||
|
|
||||||
const container = containers[containerIndex];
|
|
||||||
const { nonTableFields } = containerFields[containerIndex];
|
|
||||||
|
|
||||||
if (Object.keys(nonTableFields).length > 0) {
|
|
||||||
const record: ExtractedListData = {};
|
|
||||||
|
|
||||||
for (const [label, { selector, attribute }] of Object.entries(
|
|
||||||
nonTableFields
|
|
||||||
)) {
|
|
||||||
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
|
||||||
const element = this.queryElement(container, relativeSelector);
|
|
||||||
|
|
||||||
if (element) {
|
|
||||||
const value = this.extractValue(element, attribute);
|
|
||||||
if (value !== null && value !== "") {
|
|
||||||
record[label] = value;
|
|
||||||
} else {
|
} else {
|
||||||
console.warn(
|
console.warn(
|
||||||
`❌ No value for ${label} in container ${containerIndex + 1}`
|
` ⚠️ Skipping empty record for container ${containerIndex + 1}`
|
||||||
);
|
);
|
||||||
record[label] = "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.warn(
|
|
||||||
`❌ Element not found for ${label} with selector:`,
|
|
||||||
selector
|
|
||||||
);
|
|
||||||
record[label] = "";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Object.values(record).some((value) => value !== "")) {
|
|
||||||
nonTableData.push(record);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Combine and limit results
|
|
||||||
const extractedData = [...tableData, ...nonTableData].slice(0, limit);
|
|
||||||
|
|
||||||
return extractedData;
|
return extractedData;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error in client-side extractListData:", error);
|
console.error("💥 Error in client-side extractListData:", error);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Create indexed XPath for specific container instance
|
||||||
|
private createIndexedXPath(
|
||||||
|
childSelector: string,
|
||||||
|
listSelector: string,
|
||||||
|
containerIndex: number
|
||||||
|
): string {
|
||||||
|
// Check if the child selector contains the list selector pattern
|
||||||
|
if (childSelector.includes(listSelector.replace("//", ""))) {
|
||||||
|
// Replace the list selector part with indexed version
|
||||||
|
const listPattern = listSelector.replace("//", "");
|
||||||
|
const indexedListSelector = `(${listSelector})[${containerIndex}]`;
|
||||||
|
|
||||||
|
const indexedSelector = childSelector.replace(
|
||||||
|
`//${listPattern}`,
|
||||||
|
indexedListSelector
|
||||||
|
);
|
||||||
|
|
||||||
|
return indexedSelector;
|
||||||
|
} else {
|
||||||
|
// If pattern doesn't match, create a more generic indexed selector
|
||||||
|
// This is a fallback approach
|
||||||
|
console.warn(` ⚠️ Pattern doesn't match, using fallback approach`);
|
||||||
|
return `(${listSelector})[${containerIndex}]${childSelector.replace(
|
||||||
|
"//",
|
||||||
|
"/"
|
||||||
|
)}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper method for single XPath evaluation
|
||||||
|
private evaluateXPathSingle = (
|
||||||
|
document: Document,
|
||||||
|
xpath: string
|
||||||
|
): Element | null => {
|
||||||
|
try {
|
||||||
|
const result = document.evaluate(
|
||||||
|
xpath,
|
||||||
|
document,
|
||||||
|
null,
|
||||||
|
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
const element = result.singleNodeValue as Element | null;
|
||||||
|
|
||||||
|
if (!element) {
|
||||||
|
console.warn(`❌ XPath found no element for: ${xpath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return element;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("❌ XPath evaluation failed:", xpath, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export const clientListExtractor = new ClientListExtractor();
|
export const clientListExtractor = new ClientListExtractor();
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user