feat: enhanced lookup behaviour (#270)

Co-authored-by: Jef LeCompte <jeffreylec@gmail.com>
This commit is contained in:
Andrew Mackrodt
2020-09-25 23:29:10 +01:00
committed by GitHub
parent 7ef9d935c6
commit b868d1a483
17 changed files with 789 additions and 71 deletions
+58
View File
@@ -0,0 +1,58 @@
import {Link, Series, Store} from './model';
import {Logger, Print} from '../logger';
import {Browser} from 'puppeteer';
import cheerio from 'cheerio';
import {filterSeries} from './filter';
import {usingResponse} from '../util';
function addNewLinks(store: Store, links: Link[], series: Series) {
if (links.length === 0) {
Logger.error(Print.message('NO STORE LINKS FOUND', series, store, true));
return;
}
const existingUrls = new Set(store.links.map(link => link.url));
const newLinks = links.filter(link => !existingUrls.has(link.url));
if (newLinks.length === 0) {
return;
}
Logger.info(Print.message(`FOUND ${newLinks.length} STORE LINKS`, series, store, true));
Logger.debug(JSON.stringify(newLinks, null, 2));
store.links = store.links.concat(newLinks);
}
export async function fetchLinks(store: Store, browser: Browser) {
if (!store.linksBuilder) {
return;
}
const promises = [];
for (const {series, url} of store.linksBuilder.urls) {
if (!filterSeries(series)) {
continue;
}
Logger.info(Print.message('DETECTING STORE LINKS', series, store, true));
promises.push(usingResponse(browser, url, async response => {
const text = await response?.text();
if (!text) {
Logger.error(Print.message('NO RESPONSE', series, store, true));
return;
}
const docElement = cheerio.load(text).root();
const links = store.linksBuilder!.builder(docElement, series);
addNewLinks(store, links, series);
}));
}
await Promise.all(promises);
}
+1 -1
View File
@@ -40,7 +40,7 @@ function filterModel(model: Link['model']): boolean {
*
* @param series The series of the GPU
*/
function filterSeries(series: Link['series']): boolean {
export function filterSeries(series: Link['series']): boolean {
if (Config.store.showOnlySeries.length === 0) {
return true;
}
+82
View File
@@ -1,3 +1,85 @@
import {Element, LabelQuery} from './model';
import {Logger} from '../logger';
import {Page} from 'puppeteer';
export type Selector = {
requireVisible: boolean;
selector: string;
type: 'innerHTML' | 'outerHTML' | 'textContent';
};
function isElementArray(query: LabelQuery): query is Element[] {
return Array.isArray(query) && query.length > 0 && typeof query[0] === 'object';
}
function getQueryAsElementArray(query: LabelQuery, defaultContainer: string): Array<Required<Element>> {
if (isElementArray(query)) {
return query.map(x => ({
container: x.container ?? defaultContainer,
text: x.text
}));
}
if (Array.isArray(query)) {
return [{
container: defaultContainer,
text: query
}];
}
return [{
container: query.container ?? defaultContainer,
text: query.text
}];
}
export async function pageIncludesLabels(page: Page, query: LabelQuery, options: Selector) {
const elementQueries = getQueryAsElementArray(query, options.selector);
const resolved = await Promise.all(elementQueries.map(async query => {
const selector = {...options, selector: query.container};
const contents = await extractPageContents(page, selector) ?? '';
if (!contents) {
return false;
}
Logger.debug(contents);
return includesLabels(contents, query.text);
}));
return resolved.includes(true);
}
export async function extractPageContents(page: Page, selector: Selector): Promise<string | null> {
const content = await page.evaluate((options: Selector) => {
// eslint-disable-next-line no-undef
const element: globalThis.HTMLElement | null = document.querySelector(options.selector);
if (!element) {
return null;
}
if (options.requireVisible && !(element.offsetWidth > 0 && element.offsetHeight > 0)) {
return null;
}
switch (options.type) {
case 'innerHTML':
return element.innerHTML;
case 'outerHTML':
return element.outerHTML;
case 'textContent':
return element.textContent;
default:
return 'Error: selector.type is unknown';
}
}, selector);
return content;
}
/**
* Checks if DOM has any related text.
*
+85 -35
View File
@@ -1,15 +1,23 @@
import {Browser, Page, Response} from 'puppeteer';
import {Link, Store} from './model';
import {Logger, Print} from '../logger';
import {Selector, pageIncludesLabels} from './includes-labels';
import {closePage, delay, getSleepTime} from '../util';
import {Config} from '../config';
import {disableBlockerInPage} from '../adblocker';
import {filterStoreLink} from './filter';
import {includesLabels} from './includes-labels';
import open from 'open';
import {sendNotification} from '../notification';
type Backoff = {
count: number;
time: number;
};
const inStock: Record<string, boolean> = {};
const storeBackoff: Record<string, Backoff> = {};
/**
* Responsible for looking up information about a each product within
* a `Store`. It's important that we ignore `no-await-in-loop` here
@@ -34,6 +42,14 @@ async function lookup(browser: Browser, store: Store) {
page.setDefaultNavigationTimeout(Config.page.navigationTimeout);
await page.setUserAgent(Config.page.userAgent);
if (store.disableAdBlocker) {
try {
await disableBlockerInPage(page);
} catch (error) {
Logger.error(error);
}
}
try {
await lookupCard(browser, store, page, link);
} catch (error) {
@@ -49,7 +65,41 @@ async function lookupCard(browser: Browser, store: Store, page: Page, link: Link
const givenWaitFor = store.waitUntil ? store.waitUntil : 'networkidle0';
const response: Response | null = await page.goto(link.url, {waitUntil: givenWaitFor});
if (await lookupCardInStock(store, page)) {
if (!response) {
Logger.debug(Print.noResponse(link, store, true));
}
let backoff = storeBackoff[store.name];
if (!backoff) {
backoff = {count: 0, time: Config.browser.minBackoff};
storeBackoff[store.name] = backoff;
}
if (response?.status() === 403) {
Logger.warn(Print.backoff(link, store, backoff.time, true));
await delay(backoff.time);
backoff.count++;
backoff.time = Math.min(backoff.time * 2, Config.browser.maxBackoff);
return;
}
if (response?.status() === 429) {
Logger.warn(Print.rateLimit(link, store, true));
return;
}
if ((response?.status() || 200) >= 400) {
Logger.warn(Print.badStatusCode(link, store, response!.status(), true));
return;
}
if (backoff.count > 0) {
backoff.count--;
backoff.time = Math.max(backoff.time / 2, Config.browser.minBackoff);
}
if (await lookupCardInStock(store, page, link)) {
const givenUrl = link.cartUrl ? link.cartUrl : link.url;
Logger.info(`${Print.inStock(link, store, true)}\n${givenUrl}`);
@@ -77,48 +127,48 @@ async function lookupCard(browser: Browser, store: Store, page: Page, link: Link
link.screenshot = `success-${Date.now()}.png`;
await page.screenshot({path: link.screenshot});
}
return;
}
if (await lookupPageHasCaptcha(store, page)) {
Logger.warn(Print.captcha(link, store, true));
await delay(getSleepTime());
return;
}
if (response && response.status() === 429) {
Logger.warn(Print.rateLimit(link, store, true));
return;
}
Logger.info(Print.outOfStock(link, store, true));
}
async function lookupCardInStock(store: Store, page: Page) {
const stockHandle = await page.$(store.labels.inStock.container);
async function lookupCardInStock(store: Store, page: Page, link: Link) {
const baseOptions: Selector = {
requireVisible: false,
selector: store.labels.container ?? 'body',
type: 'textContent'
};
const visible = await page.evaluate(element => element && element.offsetWidth > 0 && element.offsetHeight > 0, stockHandle);
if (!visible) {
return false;
if (store.labels.inStock) {
const options = {...baseOptions, requireVisible: true, type: 'outerHTML' as const};
if (!await pageIncludesLabels(page, store.labels.inStock, options)) {
Logger.info(Print.outOfStock(link, store, true));
return false;
}
}
const stockContent = await page.evaluate(element => element.outerHTML, stockHandle);
Logger.debug(stockContent);
return includesLabels(stockContent, store.labels.inStock.text);
}
async function lookupPageHasCaptcha(store: Store, page: Page) {
if (!store.labels.captcha) {
return false;
if (store.labels.outOfStock) {
if (await pageIncludesLabels(page, store.labels.outOfStock, baseOptions)) {
Logger.info(Print.outOfStock(link, store, true));
return false;
}
}
const captchaHandle = await page.$(store.labels.captcha.container);
const captchaContent = await page.evaluate(element => element.textContent, captchaHandle);
if (store.labels.bannedSeller) {
if (await pageIncludesLabels(page, store.labels.bannedSeller, baseOptions)) {
Logger.warn(Print.bannedSeller(link, store, true));
return false;
}
}
return includesLabels(captchaContent, store.labels.captcha.text);
if (store.labels.captcha) {
if (await pageIncludesLabels(page, store.labels.captcha, baseOptions)) {
Logger.warn(Print.captcha(link, store, true));
await delay(getSleepTime());
return false;
}
}
return true;
}
export async function tryLookupAndLoop(browser: Browser, store: Store) {
+48
View File
@@ -0,0 +1,48 @@
export interface Card {
brand: string;
model: string;
}
export function parseCard(name: string): Card | null {
name = name.replace(/[^\w ]+/g, '').trim();
name = name.replace(/\bgraphics card\b/gi, '').trim();
name = name.replace(/\b\w+ fan\b/gi, '').trim();
name = name.replace(/\s{2,}/g, ' ');
let model = name.split(' ');
const brand = model.shift();
if (!brand) {
return null;
}
// Some vendors have oc at the beginning of the product name,
// store whether the card contains the term "oc" and remove
// it during filtering, then add it to the end of the name.
let isOC = false;
/* eslint-disable @typescript-eslint/prefer-regexp-exec */
model = model.filter(word => {
if (word.toLowerCase() === 'oc') {
isOC = true;
return false;
}
return !word.match(/^(nvidia|geforce|rtx|amp[ae]re|graphics|card|gpu|pci-?e(xpress)?|ray-?tracing|ray|tracing|core|boost)$/i) &&
!word.match(/^(\d+(?:gb?|mhz)?|gb|mhz|g?ddr(\d+x?)?)$/i);
});
/* eslint-enable @typescript-eslint/prefer-regexp-exec */
if (isOC) {
model.push('OC');
}
if (model.length === 0) {
return null;
}
return {
brand: brand.toLowerCase(),
model: model.join(' ').toLowerCase().replace(/ gaming\b/g, '').trim()
};
}
+17 -5
View File
@@ -1,13 +1,15 @@
import {Browser, LoadEvent} from 'puppeteer';
export type Element = {
container: string;
container?: string;
text: string[];
};
export type Series = 'test:series' | '3070' | '3080' | '3090';
export type Link = {
brand: 'test:brand' | 'asus' | 'evga' | 'gigabyte' | 'pny' | 'msi' | 'nvidia' | 'zotac';
series: 'test:series' | '3070' | '3080' | '3090';
brand: 'test:brand' | 'asus' | 'evga' | 'gigabyte' | 'inno3d' | 'kfa2' | 'palit' | 'pny' | 'msi' | 'nvidia' | 'zotac';
series: Series;
model: string;
url: string;
cartUrl?: string;
@@ -15,13 +17,23 @@ export type Link = {
screenshot?: string;
};
export type LabelQuery = Element[] | Element | string[];
export type Labels = {
captcha?: Element;
inStock: Element;
bannedSeller?: LabelQuery;
captcha?: LabelQuery;
container?: string;
inStock?: LabelQuery;
outOfStock?: LabelQuery;
};
export type Store = {
disableAdBlocker?: boolean;
links: Link[];
linksBuilder?: {
builder: (docElement: cheerio.Cheerio, series: Series) => Link[];
urls: Array<{series: Series; url: string}>;
};
labels: Labels;
name: string;
setupAction?: (browser: Browser) => void;