Skip to content

Commit

Permalink
implement logic for hawk settings - subsetting of product variants, a…
Browse files Browse the repository at this point in the history
…i features enabled/disabled, sampling
  • Loading branch information
Dmitriusan committed Dec 5, 2023
1 parent 46d13e5 commit 5d71e16
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 14 deletions.
29 changes: 25 additions & 4 deletions hawk/src/main/java/io/irw/hawk/configuration/HawkProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,38 @@
@Data
public class HawkProperties {

private boolean ai;

private Ai ai;
private Sampling sampling;

private Subset subset;

public boolean someAiFeaturesAreDisabled() {
return !ai.isEnabled()
|| !ai.getFeatures().isPieceCountMatching();
}

public boolean samplingIsEnabled() {
return sampling.isEnabled();
}

public boolean subsetIsEnabled() {
return subset.isEnabled();
}

@Data
public static class Ai {
private boolean enabled;
private Features features;

@Data
public static class Features {
private boolean pieceCountMatching;
}
}

@Data
public static class Sampling {
private boolean enabled;
private double rate;

}

@Data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.theokanning.openai.completion.chat.ChatMessage;
import com.theokanning.openai.completion.chat.ChatMessageRole;
import com.theokanning.openai.service.FunctionExecutor;
import io.irw.hawk.configuration.HawkProperties;
import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.scraper.ai.LlmQuery;
import io.irw.hawk.scraper.model.ProcessingPipelineStep;
Expand Down Expand Up @@ -38,6 +39,7 @@ public class AiWheelCountExtractor extends WheelCountExtractor {
How many inline skate wheels are sold in this Ebay listing\s
according to the listing description provided by the user?""";
LlmQueryService llmQueryService;
HawkProperties hawkProperties;

@Override
public List<Class<? extends ProcessingPipelineStep>> dependsOn() {
Expand All @@ -46,9 +48,10 @@ public List<Class<? extends ProcessingPipelineStep>> dependsOn() {

@Override
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return super.isApplicableTo(highlightDto)
&& highlightDto.getEbayFinding().getNumberOfPieces().isEmpty()
&& highlightDto.getNewItem();
return hawkProperties.getAi().isEnabled() && hawkProperties.getAi().getFeatures().isPieceCountMatching()
&& super.isApplicableTo(highlightDto)
&& highlightDto.getEbayFinding().getNumberOfPieces().isEmpty()
&& highlightDto.getNewItem();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.irw.hawk.scraper.service.scrape;

import io.irw.hawk.configuration.HawkProperties;
import io.irw.hawk.dto.merchandise.ProductVariantEnum;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
Expand All @@ -15,11 +16,20 @@
@Slf4j
public class ProductCatalogService {

HawkProperties hawkProperties;

AtomicLong productVariantId = new AtomicLong();
AtomicLong productQualifierId = new AtomicLong();

public List<ProductVariantEnum> getProducts() {
return List.of(ProductVariantEnum.values());
List<ProductVariantEnum> candidateValues = List.of(ProductVariantEnum.values());
if (hawkProperties.getSubset().isEnabled()) {
return candidateValues.stream()
.filter(pv -> hawkProperties.getSubset().getProducts().getVariants().contains(pv))
.toList();
} else {
return candidateValues;
}
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.irw.hawk.scraper.service.scrape;

import io.irw.hawk.configuration.HawkProperties;
import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.dto.merchandise.HawkScrapeRunDto;
import io.irw.hawk.scraper.service.domain.EbayFindingService;
Expand All @@ -22,6 +23,7 @@ public class ScrapeRunSummaryPrintingService {

EbayFindingService ebayFindingService;
EbayHighlightService ebayHighlightService;
HawkProperties hawkProperties;

public void printScrapeRunSummary(HawkScrapeRunDto hawkScrapeRunDto) {
ScrapeRunSummaryDto runSummary = ebayHighlightService.getScrapeRunSummary(hawkScrapeRunDto);
Expand All @@ -31,6 +33,9 @@ public void printScrapeRunSummary(HawkScrapeRunDto hawkScrapeRunDto) {
log.info(StringUtils.repeat("=", 80));
log.info("Scrape run summary for Run ID {} ({})", hawkScrapeRunDto.getId(), hawkScrapeRunDto.getProductVariant());
log.info("Total: {} items processed", runSummary.getTotalHighligts());

logWarnings();

runSummary.getVerdictCounts()
.forEach((key, value) -> log.info(" {} items with verdict {}", value, key));

Expand Down Expand Up @@ -67,4 +72,18 @@ private void printAucHighlight(EbayHighlightDto highlightDto) {
log.info(StringUtils.repeat("-", 80));
}

private void logWarnings() {
if (hawkProperties.someAiFeaturesAreDisabled()) {
log.warn(" ATTENTION: some AI features are disabled");
}

if (hawkProperties.samplingIsEnabled()) {
log.warn(" ATTENTION: sampling is enabled, not all entities are processed");
}

if (hawkProperties.subsetIsEnabled()) {
log.warn(" ATTENTION: subsetting is enabled, not all product variants are processed");
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,23 @@

import io.irw.hawk.configuration.HawkProperties;
import io.irw.hawk.dto.merchandise.ProductVariantEnum;
import java.util.Optional;
import java.util.Queue;
import lombok.AccessLevel;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

import java.util.Optional;
import java.util.Queue;

@Service
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
@Slf4j
public class ScrapeTargetProviderService {
ProductCatalogService productCatalogService;
Queue<ProductVariantEnum> searchTargets;
HawkProperties hawkProperties;

public ScrapeTargetProviderService(ProductCatalogService productCatalogService, HawkProperties hawkProperties) {
this.productCatalogService = productCatalogService;
this.hawkProperties = hawkProperties;
searchTargets = new java.util.LinkedList<>(productCatalogService.getProducts());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.ebay.buy.browse.api.ItemSummaryApi;
import com.ebay.buy.browse.model.ItemSummary;
import com.ebay.buy.browse.model.SearchPagedCollection;
import io.irw.hawk.configuration.HawkProperties;
import io.irw.hawk.dto.ebay.EbayFindingDto;
import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.dto.ebay.EbaySellerDto;
Expand All @@ -29,6 +30,7 @@
import java.time.Instant;
import java.util.Comparator;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
Expand Down Expand Up @@ -60,6 +62,7 @@ public class ScraperService {
EbayHighlightService ebayHighlightService;

ScrapeRunSummaryPrintingService scrapeRunSummaryPrintingService;
HawkProperties hawkProperties;

private AtomicBoolean isScraping = new AtomicBoolean(false);

Expand All @@ -83,8 +86,11 @@ private void scrapePV(ProductVariantEnum targetProductVariant) {
continue;
}

Random random = new Random();
for (ItemSummary itemSummary : result.getItemSummaries()) {
processItemSummary(targetProductVariant, itemSummary, hawkScrapeRunDto);
if (!hawkProperties.getSampling().isEnabled() || random.nextDouble() < hawkProperties.getSampling().getRate()) {
processItemSummary(targetProductVariant, itemSummary, hawkScrapeRunDto);
}
}
}
scrapeRunSummaryPrintingService.printScrapeRunSummary(hawkScrapeRunDto);
Expand Down
5 changes: 4 additions & 1 deletion hawk/src/main/resources/application.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
hawk:
ai: true
ai:
enabled: true
features:
piece_count_matching: true
sampling:
enabled: false
rate: 0.1
Expand Down

0 comments on commit 5d71e16

Please sign in to comment.