mirror of
https://github.com/opelly27/streetmerchant.git
synced 2026-05-20 07:37:39 +00:00
feat: enhanced lookup behaviour (#270)
Co-authored-by: Jef LeCompte <jeffreylec@gmail.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
import {Link, Series, Store} from './model';
|
||||
import {Logger, Print} from '../logger';
|
||||
import {Browser} from 'puppeteer';
|
||||
import cheerio from 'cheerio';
|
||||
import {filterSeries} from './filter';
|
||||
import {usingResponse} from '../util';
|
||||
|
||||
function addNewLinks(store: Store, links: Link[], series: Series) {
|
||||
if (links.length === 0) {
|
||||
Logger.error(Print.message('NO STORE LINKS FOUND', series, store, true));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const existingUrls = new Set(store.links.map(link => link.url));
|
||||
const newLinks = links.filter(link => !existingUrls.has(link.url));
|
||||
|
||||
if (newLinks.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Logger.info(Print.message(`FOUND ${newLinks.length} STORE LINKS`, series, store, true));
|
||||
Logger.debug(JSON.stringify(newLinks, null, 2));
|
||||
|
||||
store.links = store.links.concat(newLinks);
|
||||
}
|
||||
|
||||
export async function fetchLinks(store: Store, browser: Browser) {
|
||||
if (!store.linksBuilder) {
|
||||
return;
|
||||
}
|
||||
|
||||
const promises = [];
|
||||
|
||||
for (const {series, url} of store.linksBuilder.urls) {
|
||||
if (!filterSeries(series)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Logger.info(Print.message('DETECTING STORE LINKS', series, store, true));
|
||||
|
||||
promises.push(usingResponse(browser, url, async response => {
|
||||
const text = await response?.text();
|
||||
|
||||
if (!text) {
|
||||
Logger.error(Print.message('NO RESPONSE', series, store, true));
|
||||
return;
|
||||
}
|
||||
|
||||
const docElement = cheerio.load(text).root();
|
||||
const links = store.linksBuilder!.builder(docElement, series);
|
||||
|
||||
addNewLinks(store, links, series);
|
||||
}));
|
||||
}
|
||||
|
||||
await Promise.all(promises);
|
||||
}
|
||||
+1
-1
@@ -40,7 +40,7 @@ function filterModel(model: Link['model']): boolean {
|
||||
*
|
||||
* @param series The series of the GPU
|
||||
*/
|
||||
function filterSeries(series: Link['series']): boolean {
|
||||
export function filterSeries(series: Link['series']): boolean {
|
||||
if (Config.store.showOnlySeries.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,85 @@
|
||||
import {Element, LabelQuery} from './model';
|
||||
import {Logger} from '../logger';
|
||||
import {Page} from 'puppeteer';
|
||||
|
||||
export type Selector = {
|
||||
requireVisible: boolean;
|
||||
selector: string;
|
||||
type: 'innerHTML' | 'outerHTML' | 'textContent';
|
||||
};
|
||||
|
||||
function isElementArray(query: LabelQuery): query is Element[] {
|
||||
return Array.isArray(query) && query.length > 0 && typeof query[0] === 'object';
|
||||
}
|
||||
|
||||
function getQueryAsElementArray(query: LabelQuery, defaultContainer: string): Array<Required<Element>> {
|
||||
if (isElementArray(query)) {
|
||||
return query.map(x => ({
|
||||
container: x.container ?? defaultContainer,
|
||||
text: x.text
|
||||
}));
|
||||
}
|
||||
|
||||
if (Array.isArray(query)) {
|
||||
return [{
|
||||
container: defaultContainer,
|
||||
text: query
|
||||
}];
|
||||
}
|
||||
|
||||
return [{
|
||||
container: query.container ?? defaultContainer,
|
||||
text: query.text
|
||||
}];
|
||||
}
|
||||
|
||||
export async function pageIncludesLabels(page: Page, query: LabelQuery, options: Selector) {
|
||||
const elementQueries = getQueryAsElementArray(query, options.selector);
|
||||
|
||||
const resolved = await Promise.all(elementQueries.map(async query => {
|
||||
const selector = {...options, selector: query.container};
|
||||
const contents = await extractPageContents(page, selector) ?? '';
|
||||
|
||||
if (!contents) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Logger.debug(contents);
|
||||
|
||||
return includesLabels(contents, query.text);
|
||||
}));
|
||||
|
||||
return resolved.includes(true);
|
||||
}
|
||||
|
||||
export async function extractPageContents(page: Page, selector: Selector): Promise<string | null> {
|
||||
const content = await page.evaluate((options: Selector) => {
|
||||
// eslint-disable-next-line no-undef
|
||||
const element: globalThis.HTMLElement | null = document.querySelector(options.selector);
|
||||
|
||||
if (!element) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (options.requireVisible && !(element.offsetWidth > 0 && element.offsetHeight > 0)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
switch (options.type) {
|
||||
case 'innerHTML':
|
||||
return element.innerHTML;
|
||||
case 'outerHTML':
|
||||
return element.outerHTML;
|
||||
case 'textContent':
|
||||
return element.textContent;
|
||||
default:
|
||||
return 'Error: selector.type is unknown';
|
||||
}
|
||||
}, selector);
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if DOM has any related text.
|
||||
*
|
||||
|
||||
+85
-35
@@ -1,15 +1,23 @@
|
||||
import {Browser, Page, Response} from 'puppeteer';
|
||||
import {Link, Store} from './model';
|
||||
import {Logger, Print} from '../logger';
|
||||
import {Selector, pageIncludesLabels} from './includes-labels';
|
||||
import {closePage, delay, getSleepTime} from '../util';
|
||||
import {Config} from '../config';
|
||||
import {disableBlockerInPage} from '../adblocker';
|
||||
import {filterStoreLink} from './filter';
|
||||
import {includesLabels} from './includes-labels';
|
||||
import open from 'open';
|
||||
import {sendNotification} from '../notification';
|
||||
|
||||
type Backoff = {
|
||||
count: number;
|
||||
time: number;
|
||||
};
|
||||
|
||||
const inStock: Record<string, boolean> = {};
|
||||
|
||||
const storeBackoff: Record<string, Backoff> = {};
|
||||
|
||||
/**
|
||||
* Responsible for looking up information about a each product within
|
||||
* a `Store`. It's important that we ignore `no-await-in-loop` here
|
||||
@@ -34,6 +42,14 @@ async function lookup(browser: Browser, store: Store) {
|
||||
page.setDefaultNavigationTimeout(Config.page.navigationTimeout);
|
||||
await page.setUserAgent(Config.page.userAgent);
|
||||
|
||||
if (store.disableAdBlocker) {
|
||||
try {
|
||||
await disableBlockerInPage(page);
|
||||
} catch (error) {
|
||||
Logger.error(error);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await lookupCard(browser, store, page, link);
|
||||
} catch (error) {
|
||||
@@ -49,7 +65,41 @@ async function lookupCard(browser: Browser, store: Store, page: Page, link: Link
|
||||
const givenWaitFor = store.waitUntil ? store.waitUntil : 'networkidle0';
|
||||
const response: Response | null = await page.goto(link.url, {waitUntil: givenWaitFor});
|
||||
|
||||
if (await lookupCardInStock(store, page)) {
|
||||
if (!response) {
|
||||
Logger.debug(Print.noResponse(link, store, true));
|
||||
}
|
||||
|
||||
let backoff = storeBackoff[store.name];
|
||||
|
||||
if (!backoff) {
|
||||
backoff = {count: 0, time: Config.browser.minBackoff};
|
||||
storeBackoff[store.name] = backoff;
|
||||
}
|
||||
|
||||
if (response?.status() === 403) {
|
||||
Logger.warn(Print.backoff(link, store, backoff.time, true));
|
||||
await delay(backoff.time);
|
||||
backoff.count++;
|
||||
backoff.time = Math.min(backoff.time * 2, Config.browser.maxBackoff);
|
||||
return;
|
||||
}
|
||||
|
||||
if (response?.status() === 429) {
|
||||
Logger.warn(Print.rateLimit(link, store, true));
|
||||
return;
|
||||
}
|
||||
|
||||
if ((response?.status() || 200) >= 400) {
|
||||
Logger.warn(Print.badStatusCode(link, store, response!.status(), true));
|
||||
return;
|
||||
}
|
||||
|
||||
if (backoff.count > 0) {
|
||||
backoff.count--;
|
||||
backoff.time = Math.max(backoff.time / 2, Config.browser.minBackoff);
|
||||
}
|
||||
|
||||
if (await lookupCardInStock(store, page, link)) {
|
||||
const givenUrl = link.cartUrl ? link.cartUrl : link.url;
|
||||
Logger.info(`${Print.inStock(link, store, true)}\n${givenUrl}`);
|
||||
|
||||
@@ -77,48 +127,48 @@ async function lookupCard(browser: Browser, store: Store, page: Page, link: Link
|
||||
link.screenshot = `success-${Date.now()}.png`;
|
||||
await page.screenshot({path: link.screenshot});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (await lookupPageHasCaptcha(store, page)) {
|
||||
Logger.warn(Print.captcha(link, store, true));
|
||||
await delay(getSleepTime());
|
||||
return;
|
||||
}
|
||||
|
||||
if (response && response.status() === 429) {
|
||||
Logger.warn(Print.rateLimit(link, store, true));
|
||||
return;
|
||||
}
|
||||
|
||||
Logger.info(Print.outOfStock(link, store, true));
|
||||
}
|
||||
|
||||
async function lookupCardInStock(store: Store, page: Page) {
|
||||
const stockHandle = await page.$(store.labels.inStock.container);
|
||||
async function lookupCardInStock(store: Store, page: Page, link: Link) {
|
||||
const baseOptions: Selector = {
|
||||
requireVisible: false,
|
||||
selector: store.labels.container ?? 'body',
|
||||
type: 'textContent'
|
||||
};
|
||||
|
||||
const visible = await page.evaluate(element => element && element.offsetWidth > 0 && element.offsetHeight > 0, stockHandle);
|
||||
if (!visible) {
|
||||
return false;
|
||||
if (store.labels.inStock) {
|
||||
const options = {...baseOptions, requireVisible: true, type: 'outerHTML' as const};
|
||||
|
||||
if (!await pageIncludesLabels(page, store.labels.inStock, options)) {
|
||||
Logger.info(Print.outOfStock(link, store, true));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const stockContent = await page.evaluate(element => element.outerHTML, stockHandle);
|
||||
|
||||
Logger.debug(stockContent);
|
||||
|
||||
return includesLabels(stockContent, store.labels.inStock.text);
|
||||
}
|
||||
|
||||
async function lookupPageHasCaptcha(store: Store, page: Page) {
|
||||
if (!store.labels.captcha) {
|
||||
return false;
|
||||
if (store.labels.outOfStock) {
|
||||
if (await pageIncludesLabels(page, store.labels.outOfStock, baseOptions)) {
|
||||
Logger.info(Print.outOfStock(link, store, true));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const captchaHandle = await page.$(store.labels.captcha.container);
|
||||
const captchaContent = await page.evaluate(element => element.textContent, captchaHandle);
|
||||
if (store.labels.bannedSeller) {
|
||||
if (await pageIncludesLabels(page, store.labels.bannedSeller, baseOptions)) {
|
||||
Logger.warn(Print.bannedSeller(link, store, true));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return includesLabels(captchaContent, store.labels.captcha.text);
|
||||
if (store.labels.captcha) {
|
||||
if (await pageIncludesLabels(page, store.labels.captcha, baseOptions)) {
|
||||
Logger.warn(Print.captcha(link, store, true));
|
||||
await delay(getSleepTime());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
export async function tryLookupAndLoop(browser: Browser, store: Store) {
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
export interface Card {
|
||||
brand: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export function parseCard(name: string): Card | null {
|
||||
name = name.replace(/[^\w ]+/g, '').trim();
|
||||
name = name.replace(/\bgraphics card\b/gi, '').trim();
|
||||
name = name.replace(/\b\w+ fan\b/gi, '').trim();
|
||||
name = name.replace(/\s{2,}/g, ' ');
|
||||
|
||||
let model = name.split(' ');
|
||||
const brand = model.shift();
|
||||
|
||||
if (!brand) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Some vendors have oc at the beginning of the product name,
|
||||
// store whether the card contains the term "oc" and remove
|
||||
// it during filtering, then add it to the end of the name.
|
||||
let isOC = false;
|
||||
|
||||
/* eslint-disable @typescript-eslint/prefer-regexp-exec */
|
||||
model = model.filter(word => {
|
||||
if (word.toLowerCase() === 'oc') {
|
||||
isOC = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
return !word.match(/^(nvidia|geforce|rtx|amp[ae]re|graphics|card|gpu|pci-?e(xpress)?|ray-?tracing|ray|tracing|core|boost)$/i) &&
|
||||
!word.match(/^(\d+(?:gb?|mhz)?|gb|mhz|g?ddr(\d+x?)?)$/i);
|
||||
});
|
||||
/* eslint-enable @typescript-eslint/prefer-regexp-exec */
|
||||
|
||||
if (isOC) {
|
||||
model.push('OC');
|
||||
}
|
||||
|
||||
if (model.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
brand: brand.toLowerCase(),
|
||||
model: model.join(' ').toLowerCase().replace(/ gaming\b/g, '').trim()
|
||||
};
|
||||
}
|
||||
@@ -1,13 +1,15 @@
|
||||
import {Browser, LoadEvent} from 'puppeteer';
|
||||
|
||||
export type Element = {
|
||||
container: string;
|
||||
container?: string;
|
||||
text: string[];
|
||||
};
|
||||
|
||||
export type Series = 'test:series' | '3070' | '3080' | '3090';
|
||||
|
||||
export type Link = {
|
||||
brand: 'test:brand' | 'asus' | 'evga' | 'gigabyte' | 'pny' | 'msi' | 'nvidia' | 'zotac';
|
||||
series: 'test:series' | '3070' | '3080' | '3090';
|
||||
brand: 'test:brand' | 'asus' | 'evga' | 'gigabyte' | 'inno3d' | 'kfa2' | 'palit' | 'pny' | 'msi' | 'nvidia' | 'zotac';
|
||||
series: Series;
|
||||
model: string;
|
||||
url: string;
|
||||
cartUrl?: string;
|
||||
@@ -15,13 +17,23 @@ export type Link = {
|
||||
screenshot?: string;
|
||||
};
|
||||
|
||||
export type LabelQuery = Element[] | Element | string[];
|
||||
|
||||
export type Labels = {
|
||||
captcha?: Element;
|
||||
inStock: Element;
|
||||
bannedSeller?: LabelQuery;
|
||||
captcha?: LabelQuery;
|
||||
container?: string;
|
||||
inStock?: LabelQuery;
|
||||
outOfStock?: LabelQuery;
|
||||
};
|
||||
|
||||
export type Store = {
|
||||
disableAdBlocker?: boolean;
|
||||
links: Link[];
|
||||
linksBuilder?: {
|
||||
builder: (docElement: cheerio.Cheerio, series: Series) => Link[];
|
||||
urls: Array<{series: Series; url: string}>;
|
||||
};
|
||||
labels: Labels;
|
||||
name: string;
|
||||
setupAction?: (browser: Browser) => void;
|
||||
|
||||
Reference in New Issue
Block a user