Skip to content

Commit

Permalink
adding draft llm-based extractors (wheel count)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriusan committed Nov 26, 2023
1 parent caecad1 commit b243fe5
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 19 deletions.
2 changes: 2 additions & 0 deletions hawk/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ dependencies {
implementation("org.openapitools:jackson-databind-nullable:0.2.6")
implementation("com.ebay.auth:ebay-oauth-java-client:1.1.8")
implementation("org.jgrapht:jgrapht-core:1.5.2")

implementation("com.theokanning.openai-gpt3-java:service:0.18.2")
}

dependencyManagement {
Expand Down
31 changes: 31 additions & 0 deletions hawk/src/main/java/io/irw/hawk/scraper/ai/LlmQuery.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package io.irw.hawk.scraper.ai;

import com.theokanning.openai.completion.chat.ChatMessage;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Builder.Default;
import lombok.Getter;
import lombok.Value;

@Value
@Builder
public class LlmQuery {

List<ChatMessage> messages;

@Default
OpenAiModel model = OpenAiModel.GPT_3_5_TURBO;

@Getter
@AllArgsConstructor
public enum OpenAiModel {
GPT_3_5_TURBO("gpt-3.5-turbo"),
GPT_3_5_TURBO_16K("gpt-3.5-turbo-16k"),
GPT_4_TURBO("gpt-4-1106-preview");

private String name;

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import static java.util.stream.Collectors.toMap;

import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.dto.merchandise.GroupEnum;
import io.irw.hawk.dto.merchandise.MerchandiseVerdictType;
import io.irw.hawk.dto.merchandise.ProductVariantEnum;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -86,4 +88,14 @@ default void addLogStatement(EbayHighlightDto highlightDto, String message) {
highlightDto.getPipelineMetadata()
.addLog(message);
}

default ProductVariantEnum extractProductVariant(EbayHighlightDto highlightDto) {
return highlightDto.getRun()
.getProductVariant();
}

default GroupEnum extractProductGroup(EbayHighlightDto highlightDto) {
return extractProductVariant(highlightDto)
.getGroup();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
public class BasicFieldExtractor implements ItemSummaryDataExtractor {

@Override
public boolean isApplicableTo(ProductVariantEnum productVariant) {
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return true;
}

@Override
public void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
EbayFindingDto ebayFindingDto = highlightDto.getEbayFinding();
ebayFindingDto.setListingStatus(ACTIVE);
extractListingType(itemSummary, ebayFindingDto);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package io.irw.hawk.scraper.service.extractors;

import com.ebay.buy.browse.model.ItemSummary;
import io.irw.hawk.dto.merchandise.ProductVariantEnum;
import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.scraper.model.ProcessingPipelineStep;

public interface ItemSummaryDataExtractor extends ProcessingPipelineStep {

boolean isApplicableTo(ProductVariantEnum productVariantEnum);
boolean isApplicableTo(EbayHighlightDto highlightDto);

void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto);
void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto);

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
public class PieceCountExtractor implements ItemSummaryDataExtractor {

@Override
public boolean isApplicableTo(ProductVariantEnum productVariant) {
return ! productVariant.getGroup().equals(GroupEnum.WHEELS);
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return ! extractProductGroup(highlightDto).equals(GroupEnum.WHEELS);
}

@Override
public void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
highlightDto.getEbayFinding().setNumberOfPieces(Optional.of(1));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ public List<Class<? extends ProcessingPipelineStep>> dependsOn() {
}

@Override
public boolean isApplicableTo(ProductVariantEnum productVariant) {
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return true;
}

@Override
public void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
EbayFindingDto ebayFindingDto = highlightDto.getEbayFinding();

if (ebayFindingDto.getBuyingOptions().contains(AUCTION)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
public class ShippingCostExtractor implements ItemSummaryDataExtractor {

@Override
public boolean isApplicableTo(ProductVariantEnum productVariant) {
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return true;
}

@Override
public void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
Optional<BigDecimal> minShippingCost = Optional.ofNullable(itemSummary.getShippingOptions())
.flatMap(shippingOptions -> shippingOptions.stream()
.filter(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package io.irw.hawk.scraper.service.openai;

import com.theokanning.openai.completion.CompletionRequest;
import com.theokanning.openai.completion.chat.ChatCompletionRequest;
import com.theokanning.openai.service.OpenAiService;
import io.irw.hawk.scraper.ai.LlmQuery;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

@Service
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
@RequiredArgsConstructor
@Slf4j
public class LlmQueryService {

OpenAiProperties openAiProperties;

public void chatCompletion(LlmQuery llmQuery) {
OpenAiService service = new OpenAiService(openAiProperties.getToken());
CompletionRequest completionRequest = CompletionRequest.builder()
.prompt(llmQuery.getPrompt())
.model(llmQuery.getModel().getName())
.echo(false)
.build();

// TODO: return the result
service.createCompletion(completionRequest).getChoices().forEach(System.out::println);
}

public void function(LlmQuery llmQuery) {
OpenAiService service = new OpenAiService(openAiProperties.getToken());
ChatCompletionRequest completionRequest = ChatCompletionRequest.builder()
.messages(llmQuery.getPrompt())
.model(llmQuery.getModel().getName())
.echo(false)
.build();
service.createChatCompletion(completionRequest).getChoices().forEach(System.out::println);
// TODO: return the result
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package io.irw.hawk.scraper.service.openai;

import lombok.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

@Component
@ConfigurationProperties(prefix = "openai")
@Value
public class OpenAiProperties {

private String token;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package io.irw.hawk.scraper.service.processors.skates.parts.extractors;

import static org.apache.commons.lang3.StringUtils.lowerCase;

import com.ebay.buy.browse.model.ItemSummary;
import com.theokanning.openai.completion.chat.ChatMessage;
import com.theokanning.openai.completion.chat.ChatMessageRole;
import io.irw.hawk.dto.ebay.EbayHighlightDto;
import io.irw.hawk.scraper.ai.LlmQuery;
import io.irw.hawk.scraper.model.ProcessingPipelineStep;
import io.irw.hawk.scraper.service.extractors.PriceExtractor;
import io.irw.hawk.scraper.service.openai.LlmQueryService;
import java.util.List;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

@RequiredArgsConstructor
@Service
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
@Slf4j
public class AiWheelCountExtractor extends WheelCountExtractor {

public static final String HOW_MANY_WHEELS_ARE_THERE = """
How many inline skate wheels are sold in this Ebay listing\s
according to the listing description provided by the user?""";
LlmQueryService llmQueryService;

@Override
public List<Class<? extends ProcessingPipelineStep>> dependencyFor() {
return List.of(PriceExtractor.class);
}

@Override
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
String title = highlightDto.getEbayFinding().getTitle().toLowerCase();
String shortDescription = lowerCase(highlightDto.getEbayFinding()
.getItemDescription()
.orElse(""));

llmQueryService.chatCompletion(LlmQuery.builder()
.messages(List.of(
new ChatMessage(ChatMessageRole.SYSTEM.value(), HOW_MANY_WHEELS_ARE_THERE),
new ChatMessage(ChatMessageRole.USER.value(), title + "\n" + shortDescription)))
.build());

highlightDto.getEbayFinding().setNumberOfPieces(numberOfWheels);
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@ public List<Class<? extends ProcessingPipelineStep>> dependencyFor() {
}

@Override
public boolean isApplicableTo(ProductVariantEnum productVariant) {
return productVariant.getGroup().equals(GroupEnum.WHEELS);
public boolean isApplicableTo(EbayHighlightDto highlightDto) {
return extractProductGroup(highlightDto).equals(GroupEnum.WHEELS);
}

@Override
public void extractDataFromItemSummary(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
String title = itemSummary.getTitle();
String shortDescription = itemSummary.getShortDescription();
public void extractDataFromItem(ItemSummary itemSummary, EbayHighlightDto highlightDto) {
String title = highlightDto.getEbayFinding().getTitle().toLowerCase();
String shortDescription = lowerCase(highlightDto.getEbayFinding()
.getItemDescription()
.orElse(""));

var numberOfWheels = extractNumberOfWheelsFromText(title, shortDescription);
//.or();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import io.irw.hawk.scraper.model.ProcessingPipelineStep;
import io.irw.hawk.scraper.service.matchers.BaselineItemDataMatcher;
import io.irw.hawk.scraper.service.matchers.ItemSummaryMatcher;
import io.irw.hawk.scraper.service.processors.skates.parts.extractors.AiWheelCountExtractor;
import io.irw.hawk.scraper.service.processors.skates.parts.extractors.WheelCountExtractor;
import java.util.List;
import lombok.AccessLevel;
Expand All @@ -24,7 +25,7 @@ public class WheelCountMatcher implements ItemSummaryMatcher {

@Override
public List<Class<? extends ProcessingPipelineStep>> dependsOn() {
return List.of(WheelCountExtractor.class);
return List.of(WheelCountExtractor.class, AiWheelCountExtractor.class);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ private static void applyDataExtractor(ProductVariantEnum targetProductVariant,
log.trace("Running extractor: {}", itemSummaryDataExtractor.getClass()
.getSimpleName());
if (itemSummaryDataExtractor.isApplicableTo(targetProductVariant)) {
itemSummaryDataExtractor.extractDataFromItemSummary(itemSummary, highlightDto);
itemSummaryDataExtractor.extractDataFromItem(itemSummary, highlightDto);
}
}

Expand Down

0 comments on commit b243fe5

Please sign in to comment.