Skip to content
This repository has been archived by the owner on Feb 1, 2023. It is now read-only.

Speed-ups (shasum + poetry install caching) #91

Merged
merged 8 commits into from
Mar 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions brick/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import shutil
import time
import subprocess

import arrow
import click
Expand All @@ -27,6 +26,7 @@
)
from .git import GIT_BRANCH
from .logger import logger, handler
from .shell import run_shell_command

docker_client = docker.from_env()

Expand All @@ -45,17 +45,15 @@
"node:12.13.1": "v6",
}


def run_shell_command(cmd: str, check=True):
return subprocess.check_output(cmd, shell=True, encoding="utf8",).rstrip("\n")
POETRY_CACHE_LOCATION = "/root/.cache/pypoetry"


def is_yarn_install_command(cmd):
install_commands = ["yarn", "yarn install"]
# Strip flags and trim the string
split = cmd.split("--")
clean_command = split[0].strip()
return clean_command in install_commands
return cmd.startswith("yarn") or cmd.startswith("yarn install")


def is_poetry_install_command(cmd):
return cmd.startswith("poetry install")


timings = []
Expand Down Expand Up @@ -151,6 +149,9 @@ def generate_run_command(cmd, run_flags):
location = f"{YARN_CACHE_LOCATION}/{cache_version}"
logger.debug(f"Using yarn cache located at {location}")
run_flags += [f"--mount=type=cache,target={location}"]
if is_poetry_install_command(cmd):
logger.debug(f"Using poetry cache located at {POETRY_CACHE_LOCATION}")
run_flags += [f"--mount=type=cache,target={POETRY_CACHE_LOCATION}"]
if (secrets or {}).items():
# Wrap the run command with a tar command
# to untar and cleanup after us
Expand Down
24 changes: 14 additions & 10 deletions brick/lib.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import glob
import os
import subprocess
from typing import List
import re
import time

import yaml
from braceexpand import braceexpand

from .logger import logger
from .shell import get_sha1_command, run_shell_command


# Discover root path
Expand Down Expand Up @@ -96,16 +97,19 @@ def compute_hash_from_paths(paths: List[str]) -> str:
"""
if not paths:
raise ValueError("Expected input paths")
stdout = subprocess.run(
f"find {' '.join(paths)} -type f -print0 | sort -z | xargs -0 shasum | shasum",
shell=True,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=ROOT_PATH,
).stdout
sha1_sum = stdout.decode("utf-8").split(" ")[0].strip()

t_start = time.time()
sha1_command = get_sha1_command()
cmd = f"find {' '.join(paths)} -type f -print0 | sort -z | xargs -0 {sha1_command} | {sha1_command}"
sha1_sum: str = run_shell_command(cmd=cmd, cwd=ROOT_PATH).split(" ")[0].strip()
assert len(sha1_sum) == 40, "expected sha1sum of length 40"

# Wasting more than a few seconds hashing usually means that the input dependencies
# should be tweaked.
hashing_time = time.time() - t_start
if hashing_time > 3:
logger.info(f'😴 Observed slow hashing ({hashing_time:.1f}s) for command "{cmd}"')

return sha1_sum


Expand Down
23 changes: 23 additions & 0 deletions brick/shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import subprocess

from .logger import logger

_sha1_command = None


def run_shell_command(cmd: str, cwd: str = None) -> str:
return subprocess.check_output(cmd, shell=True, encoding="utf8", cwd=cwd).rstrip("\n")


def get_sha1_command() -> str:
"""Returns the fastest sha1sum command supported by the system"""
# pylint: disable=global-statement
global _sha1_command
if not _sha1_command:
try:
run_shell_command("which sha1sum")
_sha1_command = "sha1sum"
except subprocess.CalledProcessError:
logger.info("sha1sum not found on this system (performance will be slightly degraded)")
_sha1_command = "shasum"
return _sha1_command