-
Notifications
You must be signed in to change notification settings - Fork 0
/
FlipkartScraper.py
118 lines (92 loc) · 2.65 KB
/
FlipkartScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from bs4 import *
from pyrsistent import b
import requests
import os
import sys
sys.path.insert(1,'/path/to/src')
import test
# CREATE FOLDER
def folder_create(images):
try:
folder_name = 'extacts'
# folder creation
os.mkdir(folder_name)
# if folder exists with that name, ask another name
except:
print("Folder Exist with that name!")
folder_create()
# image downloading start
download_images(images, folder_name)
# DOWNLOAD ALL IMAGES FROM THAT URL
def download_images(images, folder_name):
# initial count is zero
count = 0
# print total images found in URL
print(f"Total {len(images)} Image Found!")
# checking if images is not zero
if len(images) != 0:
for i, image in enumerate(images):
# From image tag ,Fetch image Source URL
# 1.data-srcset
# 2.data-src
# 3.data-fallback-src
# 4.src
# Here we will use exception handling
# first we will search for "data-srcset" in img tag
try:
# In image tag ,searching for "data-srcset"
image_link = image["data-srcset"]
# then we will search for "data-src" in img
# tag and so on..
except:
try:
# In image tag ,searching for "data-src"
image_link = image["data-src"]
except:
try:
# In image tag ,searching for "data-fallback-src"
image_link = image["data-fallback-src"]
except:
try:
# In image tag ,searching for "src"
image_link = image["src"]
# if no Source URL found
except:
pass
# After getting Image Source URL
# We will try to get the content of image
try:
r = requests.get(image_link).content
try:
# possibility of decode
r = str(r, 'utf-8')
except UnicodeDecodeError:
# After checking above condition, Image Download start
with open(f"{folder_name}/images{i+1}.jpg", "wb+") as f:
f.write(r)
# counting number of image downloaded
count += 1
except:
pass
# There might be possible, that all
# images not download
# if all images download
if count == len(images):
print("All Images Downloaded!")
# if all images not download
else:
print(f"Total {count} Images Downloaded Out of {len(images)}")
# MAIN FUNCTION START
def main(url):
# content of URL
r = requests.get(url)
# Parse HTML Code
soup = BeautifulSoup(r.text, 'html.parser')
# find all images in URL
images = soup.findAll('img')
# Call folder create function
folder_create(images)
# take url
url = 'https://www.flipkart.com/search?q=Men+Apparel+Topwear+Tops+Black+Fall+Casual'
# CALL MAIN FUNCTION
main(url)