Skip to content

Commit

Permalink
Add max retries for downloader (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
yz3440 committed Jun 25, 2024
1 parent 8f786a5 commit 3779698
Showing 1 changed file with 42 additions and 19 deletions.
61 changes: 42 additions & 19 deletions streetview/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
async_client = httpx.AsyncClient()


DEFAULT_MAX_RETRIES = 6


@dataclass
class TileInfo:
x: int
Expand Down Expand Up @@ -45,34 +48,38 @@ def make_download_url(pano_id: str, zoom: int, x: int, y: int) -> str:
)


def fetch_panorama_tile(tile_info: TileInfo) -> Image.Image:
def fetch_panorama_tile(
tile_info: TileInfo, max_retries: int = DEFAULT_MAX_RETRIES
) -> Image.Image:
"""
Tries to download a tile, returns a PIL Image.
"""
while True:
for _ in range(max_retries):
try:
response = requests.get(tile_info.fileurl, stream=True)
break
return Image.open(BytesIO(response.content))
except requests.ConnectionError:
print("Connection error. Trying again in 2 seconds.")
time.sleep(2)
raise requests.ConnectionError("Max retries exceeded.")

return Image.open(BytesIO(response.content))


async def fetch_panorama_tile_async(tile_info: TileInfo) -> Image.Image:
async def fetch_panorama_tile_async(
tile_info: TileInfo, max_retries: int = DEFAULT_MAX_RETRIES
) -> Image.Image:
"""
Asynchronously tries to download a tile, returns a PIL Image.
"""
while True:
for _ in range(max_retries):
try:
response = await async_client.get(tile_info.fileurl)
break
return Image.open(BytesIO(response.content))

except httpx.RequestError as e:
print(f"Request error {e}. Trying again in 2 seconds.")
await asyncio.sleep(2)

return Image.open(BytesIO(response.content))
raise httpx.RequestError("Max retries exceeded.")


def iter_tile_info(pano_id: str, zoom: int) -> Generator[TileInfo, None, None]:
Expand All @@ -89,32 +96,39 @@ def iter_tile_info(pano_id: str, zoom: int) -> Generator[TileInfo, None, None]:


def iter_tiles(
pano_id: str, zoom: int, multi_threaded: bool = False
pano_id: str,
zoom: int,
max_retries: int = DEFAULT_MAX_RETRIES,
multi_threaded: bool = False,
) -> Generator[Tile, None, None]:
if not multi_threaded:
for info in iter_tile_info(pano_id, zoom):
image = fetch_panorama_tile(info)
image = fetch_panorama_tile(info, max_retries)
yield Tile(x=info.x, y=info.y, image=image)
return

with concurrent.futures.ThreadPoolExecutor() as executor:
future_to_tile = {
executor.submit(fetch_panorama_tile, info): info
executor.submit(fetch_panorama_tile, info, max_retries): info
for info in iter_tile_info(pano_id, zoom)
}
for future in concurrent.futures.as_completed(future_to_tile):
info = future_to_tile[future]
try:
image = future.result()
except Exception as exc:
print(f"{info.fileurl} generated an exception: {exc}")
raise Exception(
f"Failed to download tile {info.fileurl} due to Exception: {exc}"
)
else:
yield Tile(x=info.x, y=info.y, image=image)


async def iter_tiles_async(pano_id: str, zoom: int) -> AsyncGenerator[Tile, None]:
async def iter_tiles_async(
pano_id: str, zoom: int, max_retries: int = DEFAULT_MAX_RETRIES
) -> AsyncGenerator[Tile, None]:
for info in iter_tile_info(pano_id, zoom):
image = await fetch_panorama_tile_async(info)
image = await fetch_panorama_tile_async(info, max_retries)
yield Tile(x=info.x, y=info.y, image=image)
return

Expand All @@ -123,26 +137,33 @@ def get_panorama(
pano_id: str,
zoom: int = 5,
multi_threaded: bool = False,
max_retries: int = DEFAULT_MAX_RETRIES,
) -> Image.Image:
"""
Downloads a streetview panorama.
Multi-threaded is a lot faster, but it's also a lot more likely to get you banned.
"""

tile_width = 512
tile_height = 512

total_width, total_height = get_width_and_height_from_zoom(zoom)
panorama = Image.new("RGB", (total_width * tile_width, total_height * tile_height))

for tile in iter_tiles(pano_id=pano_id, zoom=zoom, multi_threaded=multi_threaded):
for tile in iter_tiles(
pano_id=pano_id,
zoom=zoom,
multi_threaded=multi_threaded,
max_retries=max_retries,
):
panorama.paste(im=tile.image, box=(tile.x * tile_width, tile.y * tile_height))
del tile

return panorama


async def get_panorama_async(pano_id: str, zoom: int) -> Image.Image:
async def get_panorama_async(
pano_id: str, zoom: int, max_retries: int = DEFAULT_MAX_RETRIES
) -> Image.Image:
"""
Downloads a streetview panorama by iterating through the tiles asynchronously.
This runs in about the same speed as `get_panorama` with `multi_threaded=True`.
Expand All @@ -153,7 +174,9 @@ async def get_panorama_async(pano_id: str, zoom: int) -> Image.Image:
total_width, total_height = get_width_and_height_from_zoom(zoom)
panorama = Image.new("RGB", (total_width * tile_width, total_height * tile_height))

async for tile in iter_tiles_async(pano_id=pano_id, zoom=zoom):
async for tile in iter_tiles_async(
pano_id=pano_id, zoom=zoom, max_retries=max_retries
):
panorama.paste(im=tile.image, box=(tile.x * tile_width, tile.y * tile_height))
del tile

Expand Down

0 comments on commit 3779698

Please sign in to comment.