From 5d71e162cf57561b90ee6568880405bf99ca1738 Mon Sep 17 00:00:00 2001 From: Dmitriusan Date: Tue, 5 Dec 2023 21:16:04 +0200 Subject: [PATCH] implement logic for hawk settings - subsetting of product variants, ai features enabled/disabled, sampling --- .../hawk/configuration/HawkProperties.java | 29 ++++++++++++++++--- .../extractors/AiWheelCountExtractor.java | 9 ++++-- .../service/scrape/ProductCatalogService.java | 12 +++++++- .../ScrapeRunSummaryPrintingService.java | 19 ++++++++++++ .../scrape/ScrapeTargetProviderService.java | 7 ++--- .../service/scrape/ScraperService.java | 8 ++++- hawk/src/main/resources/application.yaml | 5 +++- 7 files changed, 75 insertions(+), 14 deletions(-) diff --git a/hawk/src/main/java/io/irw/hawk/configuration/HawkProperties.java b/hawk/src/main/java/io/irw/hawk/configuration/HawkProperties.java index deb9b67..ec9b307 100644 --- a/hawk/src/main/java/io/irw/hawk/configuration/HawkProperties.java +++ b/hawk/src/main/java/io/irw/hawk/configuration/HawkProperties.java @@ -12,17 +12,38 @@ @Data public class HawkProperties { - private boolean ai; - + private Ai ai; private Sampling sampling; - private Subset subset; + public boolean someAiFeaturesAreDisabled() { + return !ai.isEnabled() + || !ai.getFeatures().isPieceCountMatching(); + } + + public boolean samplingIsEnabled() { + return sampling.isEnabled(); + } + + public boolean subsetIsEnabled() { + return subset.isEnabled(); + } + + @Data + public static class Ai { + private boolean enabled; + private Features features; + + @Data + public static class Features { + private boolean pieceCountMatching; + } + } + @Data public static class Sampling { private boolean enabled; private double rate; - } @Data diff --git a/hawk/src/main/java/io/irw/hawk/scraper/service/processors/skates/parts/extractors/AiWheelCountExtractor.java b/hawk/src/main/java/io/irw/hawk/scraper/service/processors/skates/parts/extractors/AiWheelCountExtractor.java index 62f6a31..f52c666 100644 --- a/hawk/src/main/java/io/irw/hawk/scraper/service/processors/skates/parts/extractors/AiWheelCountExtractor.java +++ b/hawk/src/main/java/io/irw/hawk/scraper/service/processors/skates/parts/extractors/AiWheelCountExtractor.java @@ -10,6 +10,7 @@ import com.theokanning.openai.completion.chat.ChatMessage; import com.theokanning.openai.completion.chat.ChatMessageRole; import com.theokanning.openai.service.FunctionExecutor; +import io.irw.hawk.configuration.HawkProperties; import io.irw.hawk.dto.ebay.EbayHighlightDto; import io.irw.hawk.scraper.ai.LlmQuery; import io.irw.hawk.scraper.model.ProcessingPipelineStep; @@ -38,6 +39,7 @@ public class AiWheelCountExtractor extends WheelCountExtractor { How many inline skate wheels are sold in this Ebay listing\s according to the listing description provided by the user?"""; LlmQueryService llmQueryService; + HawkProperties hawkProperties; @Override public List> dependsOn() { @@ -46,9 +48,10 @@ public List> dependsOn() { @Override public boolean isApplicableTo(EbayHighlightDto highlightDto) { - return super.isApplicableTo(highlightDto) - && highlightDto.getEbayFinding().getNumberOfPieces().isEmpty() - && highlightDto.getNewItem(); + return hawkProperties.getAi().isEnabled() && hawkProperties.getAi().getFeatures().isPieceCountMatching() + && super.isApplicableTo(highlightDto) + && highlightDto.getEbayFinding().getNumberOfPieces().isEmpty() + && highlightDto.getNewItem(); } @Override diff --git a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ProductCatalogService.java b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ProductCatalogService.java index 22e7426..bd62099 100644 --- a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ProductCatalogService.java +++ b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ProductCatalogService.java @@ -1,5 +1,6 @@ package io.irw.hawk.scraper.service.scrape; +import io.irw.hawk.configuration.HawkProperties; import io.irw.hawk.dto.merchandise.ProductVariantEnum; import java.util.List; import java.util.concurrent.atomic.AtomicLong; @@ -15,11 +16,20 @@ @Slf4j public class ProductCatalogService { + HawkProperties hawkProperties; + AtomicLong productVariantId = new AtomicLong(); AtomicLong productQualifierId = new AtomicLong(); public List getProducts() { - return List.of(ProductVariantEnum.values()); + List candidateValues = List.of(ProductVariantEnum.values()); + if (hawkProperties.getSubset().isEnabled()) { + return candidateValues.stream() + .filter(pv -> hawkProperties.getSubset().getProducts().getVariants().contains(pv)) + .toList(); + } else { + return candidateValues; + } } } diff --git a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeRunSummaryPrintingService.java b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeRunSummaryPrintingService.java index e82d048..7f29a74 100644 --- a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeRunSummaryPrintingService.java +++ b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeRunSummaryPrintingService.java @@ -1,5 +1,6 @@ package io.irw.hawk.scraper.service.scrape; +import io.irw.hawk.configuration.HawkProperties; import io.irw.hawk.dto.ebay.EbayHighlightDto; import io.irw.hawk.dto.merchandise.HawkScrapeRunDto; import io.irw.hawk.scraper.service.domain.EbayFindingService; @@ -22,6 +23,7 @@ public class ScrapeRunSummaryPrintingService { EbayFindingService ebayFindingService; EbayHighlightService ebayHighlightService; + HawkProperties hawkProperties; public void printScrapeRunSummary(HawkScrapeRunDto hawkScrapeRunDto) { ScrapeRunSummaryDto runSummary = ebayHighlightService.getScrapeRunSummary(hawkScrapeRunDto); @@ -31,6 +33,9 @@ public void printScrapeRunSummary(HawkScrapeRunDto hawkScrapeRunDto) { log.info(StringUtils.repeat("=", 80)); log.info("Scrape run summary for Run ID {} ({})", hawkScrapeRunDto.getId(), hawkScrapeRunDto.getProductVariant()); log.info("Total: {} items processed", runSummary.getTotalHighligts()); + + logWarnings(); + runSummary.getVerdictCounts() .forEach((key, value) -> log.info(" {} items with verdict {}", value, key)); @@ -67,4 +72,18 @@ private void printAucHighlight(EbayHighlightDto highlightDto) { log.info(StringUtils.repeat("-", 80)); } + private void logWarnings() { + if (hawkProperties.someAiFeaturesAreDisabled()) { + log.warn(" ATTENTION: some AI features are disabled"); + } + + if (hawkProperties.samplingIsEnabled()) { + log.warn(" ATTENTION: sampling is enabled, not all entities are processed"); + } + + if (hawkProperties.subsetIsEnabled()) { + log.warn(" ATTENTION: subsetting is enabled, not all product variants are processed"); + } + } + } diff --git a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeTargetProviderService.java b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeTargetProviderService.java index 2ca9cc4..ca2da80 100644 --- a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeTargetProviderService.java +++ b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScrapeTargetProviderService.java @@ -2,24 +2,23 @@ import io.irw.hawk.configuration.HawkProperties; import io.irw.hawk.dto.merchandise.ProductVariantEnum; -import java.util.Optional; -import java.util.Queue; import lombok.AccessLevel; import lombok.experimental.FieldDefaults; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; +import java.util.Optional; +import java.util.Queue; + @Service @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) @Slf4j public class ScrapeTargetProviderService { ProductCatalogService productCatalogService; Queue searchTargets; - HawkProperties hawkProperties; public ScrapeTargetProviderService(ProductCatalogService productCatalogService, HawkProperties hawkProperties) { this.productCatalogService = productCatalogService; - this.hawkProperties = hawkProperties; searchTargets = new java.util.LinkedList<>(productCatalogService.getProducts()); } diff --git a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScraperService.java b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScraperService.java index 45ab749..48a5f4c 100644 --- a/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScraperService.java +++ b/hawk/src/main/java/io/irw/hawk/scraper/service/scrape/ScraperService.java @@ -5,6 +5,7 @@ import com.ebay.buy.browse.api.ItemSummaryApi; import com.ebay.buy.browse.model.ItemSummary; import com.ebay.buy.browse.model.SearchPagedCollection; +import io.irw.hawk.configuration.HawkProperties; import io.irw.hawk.dto.ebay.EbayFindingDto; import io.irw.hawk.dto.ebay.EbayHighlightDto; import io.irw.hawk.dto.ebay.EbaySellerDto; @@ -29,6 +30,7 @@ import java.time.Instant; import java.util.Comparator; import java.util.List; +import java.util.Random; import java.util.concurrent.atomic.AtomicBoolean; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; @@ -60,6 +62,7 @@ public class ScraperService { EbayHighlightService ebayHighlightService; ScrapeRunSummaryPrintingService scrapeRunSummaryPrintingService; + HawkProperties hawkProperties; private AtomicBoolean isScraping = new AtomicBoolean(false); @@ -83,8 +86,11 @@ private void scrapePV(ProductVariantEnum targetProductVariant) { continue; } + Random random = new Random(); for (ItemSummary itemSummary : result.getItemSummaries()) { - processItemSummary(targetProductVariant, itemSummary, hawkScrapeRunDto); + if (!hawkProperties.getSampling().isEnabled() || random.nextDouble() < hawkProperties.getSampling().getRate()) { + processItemSummary(targetProductVariant, itemSummary, hawkScrapeRunDto); + } } } scrapeRunSummaryPrintingService.printScrapeRunSummary(hawkScrapeRunDto); diff --git a/hawk/src/main/resources/application.yaml b/hawk/src/main/resources/application.yaml index dd3c939..2aa2495 100644 --- a/hawk/src/main/resources/application.yaml +++ b/hawk/src/main/resources/application.yaml @@ -1,5 +1,8 @@ hawk: - ai: true + ai: + enabled: true + features: + piece_count_matching: true sampling: enabled: false rate: 0.1