Skip to content

Commit

Permalink
Add web scrapper module and db model
Browse files Browse the repository at this point in the history
  • Loading branch information
arka8038 committed Jun 22, 2024
1 parent dec20d1 commit df0e091
Show file tree
Hide file tree
Showing 10 changed files with 932 additions and 6 deletions.
4 changes: 4 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BRIGHT_DATA_USERNAME=brd-customer-hl_8051aa5d-zone-pricewise
BRIGHT_DATA_PASSWORD=aojb07aq7spf

MONGODB_URI=mongodb+srv:https://arka8038:gubbU%[email protected]/?retryWrites=true&w=majority&appName=Cluster0
5 changes: 3 additions & 2 deletions components/SearchBar.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use client'

import { scrapeAndStoreProduct } from '@/lib/actions'
import { FormEvent, useState } from 'react'

const isValidAmazonProductUrl = (url: string) => {
Expand All @@ -24,7 +25,7 @@ const SearchBar = () => {
const [searchPrompt, setSearchPrompt] = useState('')
const [isLoading, setIsLoading] = useState(false)

const handleSubmit = (event: FormEvent<HTMLFormElement>) => {
const handleSubmit = async (event: FormEvent<HTMLFormElement>) => {
event.preventDefault()

const isValidLink = isValidAmazonProductUrl(searchPrompt)
Expand All @@ -34,7 +35,7 @@ const SearchBar = () => {
try {
setIsLoading(true)

//scrape the product page
const product = await scrapeAndStoreProduct(searchPrompt)
} catch (error) {
console.error(error)
} finally {
Expand Down
115 changes: 115 additions & 0 deletions lib/actions/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
'use server'

import { revalidatePath } from 'next/cache'
import Product from '../models/product.model'
import { scrapeAmazonProduct } from '../scraper'
import { getAveragePrice, getHighestPrice, getLowestPrice } from '../utils'
import { User } from '@/types'
import { connectToDb } from '../mongoose'

export async function scrapeAndStoreProduct(productUrl: string) {
if (!productUrl) return

try {
connectToDb()

const scrapedProduct = await scrapeAmazonProduct(productUrl)

if (!scrapedProduct) return

let product = scrapedProduct

const existingProduct = await Product.findOne({ url: scrapedProduct.url })

if (existingProduct) {
const updatedPriceHistory: any = [
...existingProduct.priceHistory,
{ price: scrapedProduct.currentPrice },
]

product = {
...scrapedProduct,
priceHistory: updatedPriceHistory,
lowestPrice: getLowestPrice(updatedPriceHistory),
highestPrice: getHighestPrice(updatedPriceHistory),
averagePrice: getAveragePrice(updatedPriceHistory),
}
}

const newProduct = await Product.findOneAndUpdate(
{ url: scrapedProduct.url },
product,
{ upsert: true, new: true }
)

revalidatePath(`/products/${newProduct._id}`)
} catch (error: any) {
throw new Error(`Failed to create/update product: ${error.message}`)
}
}

export async function getProductById(productId: string) {
try {
connectToDb()

const product = await Product.findOne({ _id: productId })

if (!product) return null

return product
} catch (error) {
console.log(error)
}
}

export async function getAllProducts() {
try {
connectToDb()

const products = await Product.find()

return products
} catch (error) {
console.log(error)
}
}

export async function getSimilarProducts(productId: string) {
try {
connectToDb()

const currentProduct = await Product.findById(productId)

if (!currentProduct) return null

const similarProducts = await Product.find({
_id: { $ne: productId },
}).limit(3)

return similarProducts
} catch (error) {
console.log(error)
}
}

// export async function addUserEmailToProduct(productId: string, userEmail: string) {
// try {
// const product = await Product.findById(productId);

// if(!product) return;

// const userExists = product.users.some((user: User) => user.email === userEmail);

// if(!userExists) {
// product.users.push({ email: userEmail });

// await product.save();

// const emailContent = await generateEmailBody(product, "WELCOME");

// await sendEmail(emailContent, [userEmail]);
// }
// } catch (error) {
// console.log(error);
// }
// }
34 changes: 34 additions & 0 deletions lib/models/product.model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import mongoose from 'mongoose'

const productSchema = new mongoose.Schema(
{
url: { type: String, required: true, unique: true },
currency: { type: String, required: true },
image: { type: String, required: true },
title: { type: String, required: true },
currentPrice: { type: Number, required: true },
originalPrice: { type: Number, required: true },
priceHistory: [
{
price: { type: Number, required: true },
date: { type: Date, default: Date.now },
},
],
lowestPrice: { type: Number },
highestPrice: { type: Number },
averagePrice: { type: Number },
discountRate: { type: Number },
description: { type: String },
category: { type: String },
reviewsCount: { type: Number },
isOutOfStock: { type: Boolean, default: false },
users: [{ email: { type: String, required: true } }],
default: [],
},
{ timestamps: true }
)

const Product =
mongoose.models.Product || mongoose.model('Product', productSchema)

export default Product
19 changes: 19 additions & 0 deletions lib/mongoose.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import mongoose from 'mongoose'

let isConnected = false

export const connectToDb = async () => {
mongoose.set('strictQuery', true)

if (!process.env.MONGODB_URI) return console.log('No MongoDB URI')

if (isConnected) return console.log('Using current database connection')

try {
await mongoose.connect(process.env.MONGODB_URI)
isConnected = true
console.log('Connected to MongoDB')
} catch (error) {
console.log(error)
}
}
85 changes: 85 additions & 0 deletions lib/scraper/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import axios from 'axios'
import * as cheerio from 'cheerio'
import { extractCurrency, extractDescription, extractPrice } from '../utils'

export async function scrapeAmazonProduct(url: string) {
if (!url) return

// BrightData proxy configuration
const username = String(process.env.BRIGHT_DATA_USERNAME)
const password = String(process.env.BRIGHT_DATA_PASSWORD)
const port = 22225
const session_id = (1000000 * Math.random()) | 0

const options = {
auth: {
username: `${username}-session-${session_id}`,
password,
},
host: 'brd.superproxy.io',
port,
rejectUnauthorized: false,
}

try {
// Fetch the product page
const response = await axios.get(url, options)
const $ = cheerio.load(response.data)

// Extract the product title
const title = $('#productTitle').text().trim()
const currentPrice = extractPrice(
$('.priceToPay span.a-price-whole'),
$('.a.size.base.a-color-price'),
$('.a-button-selected .a-color-base')
)

const originalPrice = extractPrice(
$('#priceblock_ourprice'),
$('.a-price.a-text-price span.a-offscreen'),
$('#listPrice'),
$('#priceblock_dealprice'),
$('.a-size-base.a-color-price')
)

const outOfStock =
$('#availability span').text().trim().toLowerCase() ===
'currently unavailable'

const images =
$('#imgBlkFront').attr('data-a-dynamic-image') ||
$('#landingImage').attr('data-a-dynamic-image') ||
'{}'

const imageUrls = Object.keys(JSON.parse(images))

const currency = extractCurrency($('.a-price-symbol'))
const discountRate = $('.savingsPercentage').text().replace(/[-%]/g, '')

const description = extractDescription($)

// Construct data object with scraped information
const data = {
url,
currency: currency || '$',
image: imageUrls[0],
title,
currentPrice: Number(currentPrice) || Number(originalPrice),
originalPrice: Number(originalPrice) || Number(currentPrice),
priceHistory: [],
discountRate: Number(discountRate),
category: 'category',
reviewsCount: 100,
stars: 4.5,
isOutOfStock: outOfStock,
description,
lowestPrice: Number(currentPrice) || Number(originalPrice),
highestPrice: Number(originalPrice) || Number(currentPrice),
averagePrice: Number(currentPrice) || Number(originalPrice),
}

return data
} catch (error: any) {
console.log(error)
}
}
Loading

0 comments on commit df0e091

Please sign in to comment.