feat: add proxy rotation (settable per store) (#1026)

This commit is contained in:
Mark Dietzer
2020-12-01 19:25:48 -08:00
committed by GitHub
parent 65df944973
commit 490d44e1fd
8 changed files with 498 additions and 49 deletions
+13 -2
View File
@@ -5,9 +5,20 @@ export const adBlocker = new PuppeteerExtraPluginAdblocker({
blockTrackers: true
});
export async function disableBlockerInPage(page: Page) {
export async function enableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (blockerObject.isBlockingEnabled(page)) {
await blockerObject.disableBlockingInPage(page);
return;
}
await blockerObject.enableBlockingInPage(page);
}
export async function disableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (!blockerObject.isBlockingEnabled(page)) {
return;
}
await blockerObject.disableBlockingInPage(page);
}
+13 -1
View File
@@ -2,6 +2,7 @@ import {banner} from './banner';
import {config as config_} from 'dotenv';
import path from 'path';
import {readFileSync} from 'fs';
config_({path: path.resolve(__dirname, '../.env')});
@@ -354,6 +355,16 @@ const store = {
]),
stores: envOrArray(process.env.STORES, ['nvidia']).map((entry) => {
const [name, minPageSleep, maxPageSleep] = entry.match(/[^:]+/g) ?? [];
let proxyList;
try {
proxyList = readFileSync(`${name}.proxies`)
.toString()
.trim()
.split('\n')
.map((x) => x.trim());
} catch {}
return {
maxPageSleep: envOrNumberMax(
minPageSleep,
@@ -365,7 +376,8 @@ const store = {
maxPageSleep,
browser.minSleep
),
name: envOrString(name)
name: envOrString(name),
proxyList
};
})
};
-11
View File
@@ -1,25 +1,14 @@
import {startAPIServer, stopAPIServer} from './web';
import {Browser} from 'puppeteer';
import {adBlocker} from './adblocker';
import {config} from './config';
import {getSleepTime} from './util';
import {logger} from './logger';
import puppeteer from 'puppeteer-extra';
import resourceBlock from 'puppeteer-extra-plugin-block-resources';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import {storeList} from './store/model';
import {tryLookupAndLoop} from './store';
puppeteer.use(stealthPlugin());
if (config.browser.lowBandwidth) {
puppeteer.use(
resourceBlock({
blockedTypes: new Set(['image', 'font'] as const)
})
);
} else {
puppeteer.use(adBlocker);
}
let browser: Browser | undefined;
+162 -27
View File
@@ -1,4 +1,4 @@
import {Browser, Page, Response} from 'puppeteer';
import {Browser, Page, PageEventObj, Request, Response} from 'puppeteer';
import {Link, Store, getStores} from './model';
import {Print, logger} from '../logger';
import {Selector, cardPrice, pageIncludesLabels} from './includes-labels';
@@ -9,18 +9,109 @@ import {
getSleepTime,
isStatusCodeInRange
} from '../util';
import {disableBlockerInPage, enableBlockerInPage} from '../adblocker';
import {config} from '../config';
import {disableBlockerInPage} from '../adblocker';
import {fetchLinks} from './fetch-links';
import {filterStoreLink} from './filter';
import open from 'open';
import {processBackoffDelay} from './model/helpers/backoff';
import {sendNotification} from '../notification';
import useProxy from 'puppeteer-page-proxy';
const inStock: Record<string, boolean> = {};
const linkBuilderLastRunTimes: Record<string, number> = {};
function nextProxy(store: Store) {
if (!store.proxyList) {
return;
}
if (store.currentProxyIndex === undefined) {
store.currentProxyIndex = 0;
}
store.currentProxyIndex++;
if (store.currentProxyIndex >= store.proxyList.length) {
store.currentProxyIndex = 0;
}
logger.info(
` [${store.name}] Next proxy index: ${store.currentProxyIndex} / Count: ${store.proxyList.length}`
);
return store.proxyList[store.currentProxyIndex];
}
async function handleLowBandwidth(request: Request) {
if (!config.browser.lowBandwidth) {
return false;
}
const typ = request.resourceType();
if (typ === 'font' || typ === 'image') {
try {
await request.abort();
} catch {}
return true;
}
return false;
}
async function handleProxy(request: Request, proxy?: string) {
if (!proxy) {
return false;
}
try {
await useProxy(request, proxy);
} catch (error: unknown) {
logger.error(error);
try {
await request.abort();
} catch {}
}
return true;
}
async function handleAdBlock(request: Request, adBlockRequestHandler: any) {
if (!adBlockRequestHandler) {
return false;
}
return new Promise((resolve) => {
const continueFunc = async () => {
resolve(false);
};
const abortFunc = async () => {
try {
await request.abort();
} catch {}
resolve(true);
};
const requestProxy = new Proxy(request, {
get(target, prop, receiver) {
if (prop === 'continue') {
return continueFunc;
}
if (prop === 'abort') {
return abortFunc;
}
return Reflect.get(target, prop, receiver);
}
});
adBlockRequestHandler(requestProxy);
});
}
/**
* Responsible for looking up information about a each product within
* a `Store`. It's important that we ignore `no-await-in-loop` here
@@ -34,6 +125,20 @@ async function lookup(browser: Browser, store: Store) {
return;
}
if (store.linksBuilder) {
logger.info(`[${store.name}] Running linksBuilder...`);
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error(error);
}
}
}
/* eslint-disable no-await-in-loop */
for (const link of store.links) {
if (!filterStoreLink(link)) {
@@ -45,23 +150,62 @@ async function lookup(browser: Browser, store: Store) {
continue;
}
const context = config.browser.isIncognito
const proxy = nextProxy(store);
const useAdBlock = !config.browser.lowBandwidth && !store.disableAdBlocker;
const customContext = config.browser.isIncognito;
const context = customContext
? await browser.createIncognitoBrowserContext()
: browser.defaultBrowserContext();
const page = config.browser.isIncognito
? await context.newPage()
: await browser.newPage();
const page = await context.newPage();
page.setDefaultNavigationTimeout(config.page.timeout);
await page.setUserAgent(getRandomUserAgent());
if (store.disableAdBlocker) {
try {
await disableBlockerInPage(page);
} catch (error: unknown) {
logger.error(error);
}
let adBlockRequestHandler: any;
let pageProxy;
if (useAdBlock) {
const onProxyFunc = (event: keyof PageEventObj, handler: any) => {
if (event !== 'request') {
page.on(event, handler);
return;
}
adBlockRequestHandler = handler;
};
pageProxy = new Proxy(page, {
get(target, prop, receiver) {
if (prop === 'on') {
return onProxyFunc;
}
return Reflect.get(target, prop, receiver);
}
});
await enableBlockerInPage(pageProxy);
}
await page.setRequestInterception(true);
page.on('request', async (request) => {
if (await handleLowBandwidth(request)) {
return;
}
if (await handleAdBlock(request, adBlockRequestHandler)) {
return;
}
if (await handleProxy(request, proxy)) {
return;
}
try {
await request.continue();
} catch {}
});
let statusCode = 0;
try {
@@ -74,7 +218,11 @@ async function lookup(browser: Browser, store: Store) {
);
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');
// Await client.send('Network.clearBrowserCache');
}
if (pageProxy) {
await disableBlockerInPage(pageProxy);
}
// Must apply backoff before closing the page, e.g. if CloudFlare is
@@ -82,7 +230,7 @@ async function lookup(browser: Browser, store: Store) {
// before redirecting to the next page
await processBackoffDelay(store, link, statusCode);
await closePage(page);
if (config.browser.isIncognito) {
if (customContext) {
await context.close();
}
}
@@ -223,19 +371,6 @@ export async function tryLookupAndLoop(browser: Browser, store: Store) {
return;
}
if (getStores().has(store.name) && store.linksBuilder) {
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error((error as Error).message);
}
}
}
logger.debug(`[${store.name}] Starting lookup...`);
try {
await lookup(browser, store);
+1
View File
@@ -236,6 +236,7 @@ export function updateStores() {
stores.set(storeData.name, store);
store.minPageSleep = storeData.minPageSleep;
store.maxPageSleep = storeData.maxPageSleep;
store.proxyList = storeData.proxyList;
} else {
logger.warn(`No store named ${storeData.name}, skipping.`);
}
+4 -1
View File
@@ -1,4 +1,4 @@
import {Browser, LoadEvent} from 'puppeteer';
import {Browser, BrowserContext, LoadEvent} from 'puppeteer';
export type Element = {
container?: string;
@@ -178,4 +178,7 @@ export type Store = {
waitUntil?: LoadEvent;
minPageSleep?: number;
maxPageSleep?: number;
proxyList?: string[];
currentProxyIndex?: number;
};