Skip to content
This repository has been archived by the owner on Feb 1, 2023. It is now read-only.

Commit

Permalink
Speed-ups (shasum + poetry install caching) (#91)
Browse files Browse the repository at this point in the history
* Add command to lazy get the sha1 command

* Remove duplicated function

* Simplify yarn command detection

* Speed up poetry install

Two timings when updating version number in libs/api:

Before poetry cache: 59/57 seconds
After  poetry cache: 31/30 seconds

* Use the fastest shasum available (30% speedup)

* Log error if hashing takes more than 3 seconds

* Disable pylint warning (we like globals)

* Update docstring
  • Loading branch information
skovhus authored Mar 3, 2021
1 parent 6549fe0 commit 6220835
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
19 changes: 10 additions & 9 deletions brick/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import shutil
import time
import subprocess

import arrow
import click
Expand All @@ -27,6 +26,7 @@
)
from .git import GIT_BRANCH
from .logger import logger, handler
from .shell import run_shell_command

docker_client = docker.from_env()

Expand All @@ -45,17 +45,15 @@
"node:12.13.1": "v6",
}


def run_shell_command(cmd: str, check=True):
return subprocess.check_output(cmd, shell=True, encoding="utf8",).rstrip("\n")
POETRY_CACHE_LOCATION = "/root/.cache/pypoetry"


def is_yarn_install_command(cmd):
install_commands = ["yarn", "yarn install"]
# Strip flags and trim the string
split = cmd.split("--")
clean_command = split[0].strip()
return clean_command in install_commands
return cmd.startswith("yarn") or cmd.startswith("yarn install")


def is_poetry_install_command(cmd):
return cmd.startswith("poetry install")


timings = []
Expand Down Expand Up @@ -151,6 +149,9 @@ def generate_run_command(cmd, run_flags):
location = f"{YARN_CACHE_LOCATION}/{cache_version}"
logger.debug(f"Using yarn cache located at {location}")
run_flags += [f"--mount=type=cache,target={location}"]
if is_poetry_install_command(cmd):
logger.debug(f"Using poetry cache located at {POETRY_CACHE_LOCATION}")
run_flags += [f"--mount=type=cache,target={POETRY_CACHE_LOCATION}"]
if (secrets or {}).items():
# Wrap the run command with a tar command
# to untar and cleanup after us
Expand Down
24 changes: 14 additions & 10 deletions brick/lib.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import glob
import os
import subprocess
from typing import List
import re
import time

import yaml
from braceexpand import braceexpand

from .logger import logger
from .shell import get_sha1_command, run_shell_command


# Discover root path
Expand Down Expand Up @@ -96,16 +97,19 @@ def compute_hash_from_paths(paths: List[str]) -> str:
"""
if not paths:
raise ValueError("Expected input paths")
stdout = subprocess.run(
f"find {' '.join(paths)} -type f -print0 | sort -z | xargs -0 shasum | shasum",
shell=True,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=ROOT_PATH,
).stdout
sha1_sum = stdout.decode("utf-8").split(" ")[0].strip()

t_start = time.time()
sha1_command = get_sha1_command()
cmd = f"find {' '.join(paths)} -type f -print0 | sort -z | xargs -0 {sha1_command} | {sha1_command}"
sha1_sum: str = run_shell_command(cmd=cmd, cwd=ROOT_PATH).split(" ")[0].strip()
assert len(sha1_sum) == 40, "expected sha1sum of length 40"

# Wasting more than a few seconds hashing usually means that the input dependencies
# should be tweaked.
hashing_time = time.time() - t_start
if hashing_time > 3:
logger.info(f'😴 Observed slow hashing ({hashing_time:.1f}s) for command "{cmd}"')

return sha1_sum


Expand Down
23 changes: 23 additions & 0 deletions brick/shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import subprocess

from .logger import logger

_sha1_command = None


def run_shell_command(cmd: str, cwd: str = None) -> str:
return subprocess.check_output(cmd, shell=True, encoding="utf8", cwd=cwd).rstrip("\n")


def get_sha1_command() -> str:
"""Returns the fastest sha1sum command supported by the system"""
# pylint: disable=global-statement
global _sha1_command
if not _sha1_command:
try:
run_shell_command("which sha1sum")
_sha1_command = "sha1sum"
except subprocess.CalledProcessError:
logger.info("sha1sum not found on this system (performance will be slightly degraded)")
_sha1_command = "shasum"
return _sha1_command

0 comments on commit 6220835

Please sign in to comment.