mirror of
https://github.com/opelly27/streetmerchant.git
synced 2026-05-20 05:17:35 +00:00
feat: add proxy rotation (settable per store) (#1026)
This commit is contained in:
+13
-2
@@ -5,9 +5,20 @@ export const adBlocker = new PuppeteerExtraPluginAdblocker({
|
||||
blockTrackers: true
|
||||
});
|
||||
|
||||
export async function disableBlockerInPage(page: Page) {
|
||||
export async function enableBlockerInPage(page: Page) {
|
||||
const blockerObject = await adBlocker.getBlocker();
|
||||
if (blockerObject.isBlockingEnabled(page)) {
|
||||
await blockerObject.disableBlockingInPage(page);
|
||||
return;
|
||||
}
|
||||
|
||||
await blockerObject.enableBlockingInPage(page);
|
||||
}
|
||||
|
||||
export async function disableBlockerInPage(page: Page) {
|
||||
const blockerObject = await adBlocker.getBlocker();
|
||||
if (!blockerObject.isBlockingEnabled(page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
await blockerObject.disableBlockingInPage(page);
|
||||
}
|
||||
|
||||
+13
-1
@@ -2,6 +2,7 @@ import {banner} from './banner';
|
||||
|
||||
import {config as config_} from 'dotenv';
|
||||
import path from 'path';
|
||||
import {readFileSync} from 'fs';
|
||||
|
||||
config_({path: path.resolve(__dirname, '../.env')});
|
||||
|
||||
@@ -354,6 +355,16 @@ const store = {
|
||||
]),
|
||||
stores: envOrArray(process.env.STORES, ['nvidia']).map((entry) => {
|
||||
const [name, minPageSleep, maxPageSleep] = entry.match(/[^:]+/g) ?? [];
|
||||
|
||||
let proxyList;
|
||||
try {
|
||||
proxyList = readFileSync(`${name}.proxies`)
|
||||
.toString()
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((x) => x.trim());
|
||||
} catch {}
|
||||
|
||||
return {
|
||||
maxPageSleep: envOrNumberMax(
|
||||
minPageSleep,
|
||||
@@ -365,7 +376,8 @@ const store = {
|
||||
maxPageSleep,
|
||||
browser.minSleep
|
||||
),
|
||||
name: envOrString(name)
|
||||
name: envOrString(name),
|
||||
proxyList
|
||||
};
|
||||
})
|
||||
};
|
||||
|
||||
@@ -1,25 +1,14 @@
|
||||
import {startAPIServer, stopAPIServer} from './web';
|
||||
import {Browser} from 'puppeteer';
|
||||
import {adBlocker} from './adblocker';
|
||||
import {config} from './config';
|
||||
import {getSleepTime} from './util';
|
||||
import {logger} from './logger';
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import resourceBlock from 'puppeteer-extra-plugin-block-resources';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import {storeList} from './store/model';
|
||||
import {tryLookupAndLoop} from './store';
|
||||
|
||||
puppeteer.use(stealthPlugin());
|
||||
if (config.browser.lowBandwidth) {
|
||||
puppeteer.use(
|
||||
resourceBlock({
|
||||
blockedTypes: new Set(['image', 'font'] as const)
|
||||
})
|
||||
);
|
||||
} else {
|
||||
puppeteer.use(adBlocker);
|
||||
}
|
||||
|
||||
let browser: Browser | undefined;
|
||||
|
||||
|
||||
+162
-27
@@ -1,4 +1,4 @@
|
||||
import {Browser, Page, Response} from 'puppeteer';
|
||||
import {Browser, Page, PageEventObj, Request, Response} from 'puppeteer';
|
||||
import {Link, Store, getStores} from './model';
|
||||
import {Print, logger} from '../logger';
|
||||
import {Selector, cardPrice, pageIncludesLabels} from './includes-labels';
|
||||
@@ -9,18 +9,109 @@ import {
|
||||
getSleepTime,
|
||||
isStatusCodeInRange
|
||||
} from '../util';
|
||||
import {disableBlockerInPage, enableBlockerInPage} from '../adblocker';
|
||||
import {config} from '../config';
|
||||
import {disableBlockerInPage} from '../adblocker';
|
||||
import {fetchLinks} from './fetch-links';
|
||||
import {filterStoreLink} from './filter';
|
||||
import open from 'open';
|
||||
import {processBackoffDelay} from './model/helpers/backoff';
|
||||
import {sendNotification} from '../notification';
|
||||
import useProxy from 'puppeteer-page-proxy';
|
||||
|
||||
const inStock: Record<string, boolean> = {};
|
||||
|
||||
const linkBuilderLastRunTimes: Record<string, number> = {};
|
||||
|
||||
function nextProxy(store: Store) {
|
||||
if (!store.proxyList) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (store.currentProxyIndex === undefined) {
|
||||
store.currentProxyIndex = 0;
|
||||
}
|
||||
|
||||
store.currentProxyIndex++;
|
||||
if (store.currentProxyIndex >= store.proxyList.length) {
|
||||
store.currentProxyIndex = 0;
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`ℹ [${store.name}] Next proxy index: ${store.currentProxyIndex} / Count: ${store.proxyList.length}`
|
||||
);
|
||||
|
||||
return store.proxyList[store.currentProxyIndex];
|
||||
}
|
||||
|
||||
async function handleLowBandwidth(request: Request) {
|
||||
if (!config.browser.lowBandwidth) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const typ = request.resourceType();
|
||||
if (typ === 'font' || typ === 'image') {
|
||||
try {
|
||||
await request.abort();
|
||||
} catch {}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function handleProxy(request: Request, proxy?: string) {
|
||||
if (!proxy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
await useProxy(request, proxy);
|
||||
} catch (error: unknown) {
|
||||
logger.error(error);
|
||||
try {
|
||||
await request.abort();
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async function handleAdBlock(request: Request, adBlockRequestHandler: any) {
|
||||
if (!adBlockRequestHandler) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const continueFunc = async () => {
|
||||
resolve(false);
|
||||
};
|
||||
|
||||
const abortFunc = async () => {
|
||||
try {
|
||||
await request.abort();
|
||||
} catch {}
|
||||
|
||||
resolve(true);
|
||||
};
|
||||
|
||||
const requestProxy = new Proxy(request, {
|
||||
get(target, prop, receiver) {
|
||||
if (prop === 'continue') {
|
||||
return continueFunc;
|
||||
}
|
||||
|
||||
if (prop === 'abort') {
|
||||
return abortFunc;
|
||||
}
|
||||
|
||||
return Reflect.get(target, prop, receiver);
|
||||
}
|
||||
});
|
||||
adBlockRequestHandler(requestProxy);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Responsible for looking up information about a each product within
|
||||
* a `Store`. It's important that we ignore `no-await-in-loop` here
|
||||
@@ -34,6 +125,20 @@ async function lookup(browser: Browser, store: Store) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (store.linksBuilder) {
|
||||
logger.info(`[${store.name}] Running linksBuilder...`);
|
||||
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
|
||||
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
|
||||
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
|
||||
try {
|
||||
await fetchLinks(store, browser);
|
||||
linkBuilderLastRunTimes[store.name] = Date.now();
|
||||
} catch (error: unknown) {
|
||||
logger.error(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* eslint-disable no-await-in-loop */
|
||||
for (const link of store.links) {
|
||||
if (!filterStoreLink(link)) {
|
||||
@@ -45,23 +150,62 @@ async function lookup(browser: Browser, store: Store) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const context = config.browser.isIncognito
|
||||
const proxy = nextProxy(store);
|
||||
|
||||
const useAdBlock = !config.browser.lowBandwidth && !store.disableAdBlocker;
|
||||
const customContext = config.browser.isIncognito;
|
||||
|
||||
const context = customContext
|
||||
? await browser.createIncognitoBrowserContext()
|
||||
: browser.defaultBrowserContext();
|
||||
const page = config.browser.isIncognito
|
||||
? await context.newPage()
|
||||
: await browser.newPage();
|
||||
const page = await context.newPage();
|
||||
|
||||
page.setDefaultNavigationTimeout(config.page.timeout);
|
||||
await page.setUserAgent(getRandomUserAgent());
|
||||
|
||||
if (store.disableAdBlocker) {
|
||||
try {
|
||||
await disableBlockerInPage(page);
|
||||
} catch (error: unknown) {
|
||||
logger.error(error);
|
||||
}
|
||||
let adBlockRequestHandler: any;
|
||||
let pageProxy;
|
||||
if (useAdBlock) {
|
||||
const onProxyFunc = (event: keyof PageEventObj, handler: any) => {
|
||||
if (event !== 'request') {
|
||||
page.on(event, handler);
|
||||
return;
|
||||
}
|
||||
|
||||
adBlockRequestHandler = handler;
|
||||
};
|
||||
|
||||
pageProxy = new Proxy(page, {
|
||||
get(target, prop, receiver) {
|
||||
if (prop === 'on') {
|
||||
return onProxyFunc;
|
||||
}
|
||||
|
||||
return Reflect.get(target, prop, receiver);
|
||||
}
|
||||
});
|
||||
await enableBlockerInPage(pageProxy);
|
||||
}
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', async (request) => {
|
||||
if (await handleLowBandwidth(request)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (await handleAdBlock(request, adBlockRequestHandler)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (await handleProxy(request, proxy)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await request.continue();
|
||||
} catch {}
|
||||
});
|
||||
|
||||
let statusCode = 0;
|
||||
|
||||
try {
|
||||
@@ -74,7 +218,11 @@ async function lookup(browser: Browser, store: Store) {
|
||||
);
|
||||
const client = await page.target().createCDPSession();
|
||||
await client.send('Network.clearBrowserCookies');
|
||||
await client.send('Network.clearBrowserCache');
|
||||
// Await client.send('Network.clearBrowserCache');
|
||||
}
|
||||
|
||||
if (pageProxy) {
|
||||
await disableBlockerInPage(pageProxy);
|
||||
}
|
||||
|
||||
// Must apply backoff before closing the page, e.g. if CloudFlare is
|
||||
@@ -82,7 +230,7 @@ async function lookup(browser: Browser, store: Store) {
|
||||
// before redirecting to the next page
|
||||
await processBackoffDelay(store, link, statusCode);
|
||||
await closePage(page);
|
||||
if (config.browser.isIncognito) {
|
||||
if (customContext) {
|
||||
await context.close();
|
||||
}
|
||||
}
|
||||
@@ -223,19 +371,6 @@ export async function tryLookupAndLoop(browser: Browser, store: Store) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (getStores().has(store.name) && store.linksBuilder) {
|
||||
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
|
||||
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
|
||||
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
|
||||
try {
|
||||
await fetchLinks(store, browser);
|
||||
linkBuilderLastRunTimes[store.name] = Date.now();
|
||||
} catch (error: unknown) {
|
||||
logger.error((error as Error).message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(`[${store.name}] Starting lookup...`);
|
||||
try {
|
||||
await lookup(browser, store);
|
||||
|
||||
@@ -236,6 +236,7 @@ export function updateStores() {
|
||||
stores.set(storeData.name, store);
|
||||
store.minPageSleep = storeData.minPageSleep;
|
||||
store.maxPageSleep = storeData.maxPageSleep;
|
||||
store.proxyList = storeData.proxyList;
|
||||
} else {
|
||||
logger.warn(`No store named ${storeData.name}, skipping.`);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import {Browser, LoadEvent} from 'puppeteer';
|
||||
import {Browser, BrowserContext, LoadEvent} from 'puppeteer';
|
||||
|
||||
export type Element = {
|
||||
container?: string;
|
||||
@@ -178,4 +178,7 @@ export type Store = {
|
||||
waitUntil?: LoadEvent;
|
||||
minPageSleep?: number;
|
||||
maxPageSleep?: number;
|
||||
|
||||
proxyList?: string[];
|
||||
currentProxyIndex?: number;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user