# Installation
-1. First, create a file named `.env` in the root folder of the project
-2. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file.
-3. Choose your installation method below
+1. Create a root folder for your project (e.g. 'maxun')
+2. Create a file named `.env` in the root folder of the project
+3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file.
+4. Choose your installation method below
### Docker Compose
-1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml)
-2. Ensure you have setup the `.env` file
-3. Run the command below
+1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml) into your root folder
+2. Ensure you have setup the `.env` file in that same folder
+3. Run the command below from a terminal
```
docker-compose up -d
```
diff --git a/docker-compose.yml b/docker-compose.yml
index 3c6e3a0f..874e48d6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -43,7 +43,7 @@ services:
#build:
#context: .
#dockerfile: server/Dockerfile
- image: getmaxun/maxun-backend:v0.0.7
+ image: getmaxun/maxun-backend:v0.0.9
ports:
- "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
@@ -70,7 +70,7 @@ services:
#build:
#context: .
#dockerfile: Dockerfile
- image: getmaxun/maxun-frontend:v0.0.3
+ image: getmaxun/maxun-frontend:v0.0.5
ports:
- "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}"
env_file: .env
diff --git a/maxun-core/package.json b/maxun-core/package.json
index 36d06aa9..7c92d08e 100644
--- a/maxun-core/package.json
+++ b/maxun-core/package.json
@@ -1,6 +1,6 @@
{
"name": "maxun-core",
- "version": "0.0.6",
+ "version": "0.0.7",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",
diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js
index 09b6578b..a2009d78 100644
--- a/maxun-core/src/browserSide/scraper.js
+++ b/maxun-core/src/browserSide/scraper.js
@@ -265,41 +265,72 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const scrapedData = [];
while (scrapedData.length < limit) {
- // Get all parent elements matching the listSelector
- const parentElements = Array.from(document.querySelectorAll(listSelector));
+ let parentElements = Array.from(document.querySelectorAll(listSelector));
+
+ // If we only got one element or none, try a more generic approach
+ if (limit > 1 && parentElements.length <= 1) {
+ const [containerSelector, _] = listSelector.split('>').map(s => s.trim());
+ const container = document.querySelector(containerSelector);
+
+ if (container) {
+ const allChildren = Array.from(container.children);
+
+ const firstMatch = document.querySelector(listSelector);
+ if (firstMatch) {
+ // Get classes from the first matching element
+ const firstMatchClasses = Array.from(firstMatch.classList);
+
+ // Find similar elements by matching most of their classes
+ parentElements = allChildren.filter(element => {
+ const elementClasses = Array.from(element.classList);
- // Iterate through each parent element
- for (const parent of parentElements) {
- if (scrapedData.length >= limit) break;
- const record = {};
-
- // For each field, select the corresponding element within the parent
- for (const [label, { selector, attribute }] of Object.entries(fields)) {
- const fieldElement = parent.querySelector(selector);
-
- if (fieldElement) {
- if (attribute === 'innerText') {
- record[label] = fieldElement.innerText.trim();
- } else if (attribute === 'innerHTML') {
- record[label] = fieldElement.innerHTML.trim();
- } else if (attribute === 'src') {
- // Handle relative 'src' URLs
- const src = fieldElement.getAttribute('src');
- record[label] = src ? new URL(src, window.location.origin).href : null;
- } else if (attribute === 'href') {
- // Handle relative 'href' URLs
- const href = fieldElement.getAttribute('href');
- record[label] = href ? new URL(href, window.location.origin).href : null;
- } else {
- record[label] = fieldElement.getAttribute(attribute);
+ // Element should share at least 70% of classes with the first match
+ const commonClasses = firstMatchClasses.filter(cls =>
+ elementClasses.includes(cls));
+ return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7);
+ });
+ }
}
- }
}
- scrapedData.push(record);
- }
+
+ // Iterate through each parent element
+ for (const parent of parentElements) {
+ if (scrapedData.length >= limit) break;
+ const record = {};
+
+ // For each field, select the corresponding element within the parent
+ for (const [label, { selector, attribute }] of Object.entries(fields)) {
+ const fieldElement = parent.querySelector(selector);
+
+ if (fieldElement) {
+ if (attribute === 'innerText') {
+ record[label] = fieldElement.innerText.trim();
+ } else if (attribute === 'innerHTML') {
+ record[label] = fieldElement.innerHTML.trim();
+ } else if (attribute === 'src') {
+ // Handle relative 'src' URLs
+ const src = fieldElement.getAttribute('src');
+ record[label] = src ? new URL(src, window.location.origin).href : null;
+ } else if (attribute === 'href') {
+ // Handle relative 'href' URLs
+ const href = fieldElement.getAttribute('href');
+ record[label] = href ? new URL(href, window.location.origin).href : null;
+ } else {
+ record[label] = fieldElement.getAttribute(attribute);
+ }
+ }
+ }
+ scrapedData.push(record);
+ }
+
+ // If we've processed all available elements and still haven't reached the limit,
+ // break to avoid infinite loop
+ if (parentElements.length === 0 || scrapedData.length >= parentElements.length) {
+ break;
+ }
}
- return scrapedData
- };
+ return scrapedData;
+};
/**
diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts
index d1cc8318..c581954d 100644
--- a/maxun-core/src/interpret.ts
+++ b/maxun-core/src/interpret.ts
@@ -111,13 +111,21 @@ export default class Interpreter extends EventEmitter {
private async applyAdBlocker(page: Page): Promise- If you enable this option, every time this robot runs a task - successfully, its captured data will be appended to your - Google Sheet. -
+{t('integration_settings.descriptions.sync_info')}
> ) : ( <> {recording.google_sheet_email && (
Run the commands below
+ # cd to project directory (eg: maxun)
+
+ cd maxun
+
+
# pull latest changes
git pull origin master
@@ -228,6 +251,16 @@ export const NavBar: React.FC = ({ recordingName, isRecording }) =>
Run the commands below
+ # cd to project directory (eg: maxun)
+
+ cd maxun
+
+
+ # stop the working containers
+
+ docker-compose down
+
+
# pull latest docker images
docker-compose pull
@@ -270,7 +303,7 @@ export const NavBar: React.FC = ({ recordingName, isRecording }) =>
PaperProps={{ sx: { width: '180px' } }}
>
+
+
>
) : (
@@ -300,14 +390,80 @@ export const NavBar: React.FC = ({ recordingName, isRecording }) =>
'&:hover': { color: 'white', backgroundColor: 'red' }
}}>
- Discard
+ {t('navbar.recording.discard')}
>
)}
producthunt.com/topics/database'
+ })
+ }}/>
producthunt.com/topics/api, you can duplicate it to scrape similar pages
- like producthunt.com/topics/database without training a robot from scratch.
-
-