feat(scraping): change lookup impl, add randomize sleep (#110)

Co-authored-by: Jef LeCompte <jeffreylec@gmail.com>
This commit is contained in:
Mark Dietzer
2020-09-20 10:52:37 -07:00
committed by GitHub
parent 2e5d13bda5
commit dc0f710674
5 changed files with 26 additions and 29 deletions
+19 -24
View File
@@ -5,11 +5,28 @@ import {Config} from './config';
import {Store, Stores} from './store/model';
import {Logger} from './logger';
import {lookup} from './store';
import async from 'async';
import {Browser} from 'puppeteer';
puppeteer.use(stealthPlugin());
puppeteer.use(adblockerPlugin({blockTrackers: true}));
function getSleepTime() {
return Config.browser.minSleep + (Math.random() * (Config.browser.maxSleep - Config.browser.minSleep));
}
async function tryLookupAndLoop(browser: Browser, store: Store) {
Logger.debug(`[${store.name}] Starting lookup...`);
try {
await lookup(browser, store);
} catch (error) {
Logger.error(error);
}
const sleepTime = getSleepTime();
Logger.debug(`[${store.name}] Lookup done, next one in ${sleepTime} ms`);
setTimeout(tryLookupAndLoop, sleepTime, browser, store);
}
/**
* Starts the bot.
*/
@@ -22,32 +39,10 @@ async function main() {
}
});
const q = async.queue<Store>(async (store: Store, cb) => {
setTimeout(async () => {
try {
Logger.debug(`↗ scraping initialized - ${store.name}`);
await lookup(browser, store);
} catch (error) {
// Ignoring errors; more than likely due to rate limits
Logger.error(error);
} finally {
cb();
q.push(store);
}
}, Config.browser.rateLimitTimeout);
}, Stores.length);
for (const store of Stores) {
Logger.debug(store.links);
q.push(store);
if (Stores.length === 1) {
q.push(store);
} // Keep from completely draining
setTimeout(tryLookupAndLoop, getSleepTime(), browser, store);
}
await q.drain();
await browser.close();
}
/**