From 752014925d055387ff4590a9862fb382350b0e5d Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 2 Mar 2023 23:53:47 -0800 Subject: [PATCH 01/15] Make generated file warnings always encourage --keep_going --- refresh.template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 6147d8c..76431c5 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -593,8 +593,8 @@ def _get_files(compile_action): log_warning(f""">>> A source file you compile doesn't (yet) exist: {source_file} It's probably a generated file, and you haven't yet run a build to generate it. That's OK; your code doesn't even have to compile for this tool to work. - If you can, though, you might want to run a build of your code. - That way everything is generated, browsable and indexed for autocomplete. + If you can, though, you might want to run a build of your code with --keep_going. + That way everything possible is generated, browsable and indexed for autocomplete. However, if you have *already* built your code, and generated the missing file... Please make sure you're supplying this tool with the same flags you use to build. You can either use a refresh_compile_commands rule or the special -- syntax. Please see the README. From 07c307ab34d458cf0a4187a15ce1f6a2b72c408c Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 6 Mar 2023 22:56:02 -0800 Subject: [PATCH 02/15] Switch to git rev-parse --git-common-dir See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/103 --- refresh.template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 76431c5..2eec2fe 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -973,8 +973,8 @@ def _ensure_external_workspaces_link_exists(): def _ensure_gitignore_entries_exist(): """Ensure `//compile_commands.json`, `//external`, and other useful entries are `.gitignore`'d if in a git repo.""" - # Silently check if we're (nested) within a git repository. It isn't sufficient to check for the presence of a `.git` directory, in case, e.g., the bazel workspace is nested inside the git repository or you're off in git worktree. - git_dir_process = subprocess.run('git rev-parse --git-dir', + # Silently check if we're (nested) within a git repository. It isn't sufficient to check for the presence of a `.git` directory, in case, e.g., the bazel workspace is nested inside the git repository or you're off in a git worktree. + git_dir_process = subprocess.run('git rev-parse --git-common-dir', # common-dir because despite current gitignore docs, there's just one info/exclude in the common git dir, not one in each of the worktree's git dirs. shell=True, # Ensure this will still fail with a nonzero error code even if `git` isn't installed, unifying error cases. stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, encoding=locale.getpreferredencoding(), From f02c9a82e4ea166584a7e2e58d566872121fba7c Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 14 Mar 2023 01:17:22 -0700 Subject: [PATCH 03/15] Add note about possibility of inferring MSVC's localized inclusion note string --- refresh.template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/refresh.template.py b/refresh.template.py index 2eec2fe..3cb871f 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -386,6 +386,7 @@ def _get_headers_msvc(compile_args: typing.List[str], source_path: str): # Based on the locale, `cl.exe` will emit different marker strings. See also https://github.com/ninja-build/ninja/issues/613#issuecomment-885185024 and https://github.com/bazelbuild/bazel/pull/7966. # We can't just set environment['VSLANG'] = "1033" (English) and be done with it, because we can't assume the user has the English language pack installed. + # Note that, if we're ever having problems with MSVC changing these strings too often, we can instead infer them by compiling some test files and passing /nologo. See https://github.com/ninja-build/ninja/issues/613#issuecomment-1465084387 include_marker = ( 'Note: including file:', # English - United States '注意: 包含文件: ', # Chinese - People's Republic of China From ff78a9cb12dff3c4d362e72ea4e43b80eac8b53b Mon Sep 17 00:00:00 2001 From: Zhanyong Wan Date: Fri, 17 Mar 2023 02:48:53 +0000 Subject: [PATCH 04/15] Make refresh.template.py compatible with python3.6. --- refresh.template.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 3cb871f..5520af5 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -11,8 +11,9 @@ """ import sys -if sys.version_info < (3,7): - sys.exit("\n\033[31mFATAL ERROR:\033[0m Python 3.7 or later is required. Please update!") +if sys.version_info < (3,6): + sys.exit("\n\033[31mFATAL ERROR:\033[0m Python 3.6 or later is required. Please update!") + # 3.6 backwards compatibility required by @zhanyong-wan in https://github.com/hedronvision/bazel-compile-commands-extractor/issues/111. # 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27. Try to contact him before upgrading. # When adding things could be cleaner if we had a higher minimum version, please add a comment with MIN_PY=3.. # Similarly, when upgrading, please search for that MIN_PY= tag. @@ -99,7 +100,10 @@ def _get_bazel_cached_action_keys(): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( ['bazel', 'dump', '--action_cache'], - capture_output=True, + # The capture_output argument was introduced in python3.7, so we use + # stdout and stderr for python3.6 compatibility. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), check=True, # Should always succeed. ) @@ -226,7 +230,10 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke header_search_process = _subprocess_run_spilling_over_to_param_file_if_needed( # Note: gcc/clang can be run from Windows, too. header_cmd, - capture_output=True, + # The capture_output argument was introduced in python3.7, so we use + # stdout and stderr for python3.6 compatibility. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), check=False, # We explicitly ignore errors and carry on. ) @@ -234,7 +241,7 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke # Tolerate failure gracefully--during editing the code may not compile! if header_search_process.stderr: _print_header_finding_warning_once() - print(header_search_process.stderr, file=sys.stderr, end='') # Captured with capture_output and dumped explicitly to avoid interlaced output. + print(header_search_process.stderr, file=sys.stderr, end='') # Captured with stdout/stderr and dumped explicitly to avoid interlaced output. if not header_search_process.stdout: # Worst case, we couldn't get the headers, return set() @@ -882,7 +889,10 @@ def _get_commands(target: str, flags: str): aquery_process = subprocess.run( aquery_args, - capture_output=True, + # The capture_output argument was introduced in python3.7, so we use + # stdout and stderr for python3.6 compatibility. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), check=False, # We explicitly ignore errors from `bazel aquery` and carry on. ) From 28b61e6fae997ba5d6eaf2b5ddbf82b66f96fece Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 16 Mar 2023 20:42:00 -0700 Subject: [PATCH 05/15] Update listening strategy notes --- ImplementationReadme.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ImplementationReadme.md b/ImplementationReadme.md index 65c10bc..e2910e6 100644 --- a/ImplementationReadme.md +++ b/ImplementationReadme.md @@ -138,16 +138,16 @@ It looks like long ago—and perhaps still inside Google—Bazel created such an We're using bazel's [`aquery` ("action query")](https://docs.bazel.build/versions/master/aquery.html) subcommand to dump out all of the compiler invocations Bazel intends to use as part of a build. The key important features of `aquery` that make it the right choice are (1) that it operates over compile actions, so it directly gives us access to the configured compile invocations we need for `compile_commands.json` and (2) that it's comparatively really fast because it doesn't do a full build. -Previously, we'd built things using [`action_listeners`](https://docs.bazel.build/versions/master/be/extra-actions.html). (The old, original implementation lives in our git history). Action listeners are good in that, like `aquery`, they directly listen at the level of compile actions. However, they can only be invoked at the same time as a full build. This makes the first generation *really* slow—like 30m instead of aquery's 30s. Rebuilds that should hit a warm cache are also slow (~10m) if you have multiple targets, which has to be an [unnecessary cache miss bug in Bazel](https://github.com/bazelbuild/bazel/issues/13029). You might be able to get around the 30m cold issue by suppressing build with [`--nobuild`](https://docs.bazel.build/versions/master/command-line-reference.html#flag--build), though we haven't tried it and it might disrupt the output protos or otherwise not work. (Only haven't tried because we switched implementations before finding the flag.) Another key downside compared to `aquery` is that `action_listener` output accumulates in the build tree, and there's no no way to tell which outputs are fresh. There's always the risk that stale output could bork your `compile_commands.json`, so you need to widen the user interface to include a cleaning step. An additional issue is future support: action listeners are marked as experimental in their docs and there are [occasional, stale references to deprecating them](https://github.com/bazelbuild/bazel/issues/4824). The main (but hopeful temporary) benefit of action listeners is that they auto-identify headers used by a given source file, which makes working around the [header commands inference issue](https://github.com/clangd/clangd/issues/123) easier. Another smaller benefit is that being integrated with building guarantees generated files have been generated. So while `action_listeners` can get the job done, they're slow and the abstraction is leaky and questionably supported. +Previously, we'd built things using [`action_listeners`](https://docs.bazel.build/versions/master/be/extra-actions.html). (The old, original implementation lives in our git history). Action listeners are good in that, like `aquery`, they directly listen at the level of compile actions. However, they can only be invoked at the same time as a full build. This makes the first generation *really* slow—like 30m instead of aquery's 30s. Rebuilds that should hit a warm cache are also slow (~10m) if you have multiple targets, which has to be an [unnecessary cache miss bug in Bazel](https://github.com/bazelbuild/bazel/issues/13029). You might be able to get around the 30m cold issue by suppressing build with [`--nobuild`](https://docs.bazel.build/versions/master/command-line-reference.html#flag--build), though we haven't tried it and it might disrupt the output protos or otherwise not work. (Only haven't tried because we switched implementations before finding the flag.) Another key downside compared to `aquery` is that `action_listener` output accumulates in the build tree, and there's no no way to tell which outputs are fresh. There's always the risk that stale output could bork your `compile_commands.json`, so you need to widen the user interface to include a cleaning step. An additional issue is future support: action listeners are marked as deprecated in their docs, even if they are still used (at the time of writing) by Google's Kythe tooling. The main (but hopeful temporary) benefit of action listeners is that they auto-identify headers used by a given source file, which makes working around the [header commands inference issue](https://github.com/clangd/clangd/issues/123) easier. Another smaller benefit is that being integrated with building guarantees generated files have been generated. So while `action_listeners` can get the job done, they're slow and the abstraction is leaky and questionably supported. -What you really don't want to do is use `bazel query`, `bazel cquery`, or `bazel aspect` if you want compile commands. The primary problem with each is that they crawl the graph of bazel targets (think graph nodes), rather than actually listening to the commands Bazel is going to invoke during its compile actions. We're trying to output compile commands, not graph nodes, and it's not a 1:1 relationship! The perils here are surfaced by Bazel platform transitions. In Bazel, rules often configure their dependencies through a mechanism known as transitions. Any of these node-at-a-time strategies will miss those dependency configurations, since they'll view the nodes as independent and won't know they're being compiled (possibly multiple times!) for different platforms. Both `aspects` and `query` have issues with `select()` statements, unlike `cquery`, which takes configuration into account (hence "configured query"->cquery). But aspects are particularly problematic because they only propagate along a named attribute (like `"deps"`), breaking the chain at things like genrules, which name things differently (`"srcs"` not `"deps"` for genrules). But `query`, `cquery`, and `aspect` share the key fundamental flaw of operating on nodes not edges. +What you really don't want to do is use `bazel query`, `bazel cquery`, or `bazel aspect` if you want compile commands. The primary problem with each is that they crawl the graph of bazel targets (think graph nodes), rather than actually listening to the commands Bazel is going to invoke during its compile actions. We're trying to output compile commands, not graph nodes, and it's not a 1:1 relationship! Rather than getting the commands directly with aquery, you'd have to duplicate the rule attribute -> command line logic from bazel & the toolchain, which is tough with custom toolchains, platform-specific logic, and in-graph configuration transitions for cross-compiling. Note that `query` has issues with `select()` statements, unlike `cquery`, which takes configuration into account (hence "configured query"->cquery). Aspects are also problematic because they only propagate along attributes (like `"deps"`), breaking the chain at things like genrules, which name things differently (`"srcs"` not `"deps"` for genrules) vs. easily sorting to the right mnemonic compile actions with aquery even for actions that are for code-gen tools being built for the host. But most importantly, `query`, `cquery`, and `aspect` share the key fundamental flaw of operating on graph nodes not action-graph edges. ### References Used When Building There were a handful of existing implementations for getting compile commands from Bazel. None came particularly close to working well across platforms we were trying, falling into the gotchas listed in this doc, but we really appreciate the effort that went into them and the learnings we gleaned. Here they are, categorized by type and problem, sorted by closest to working. -The closest—and only aquery approach: https://github.com/thoren-d/compdb-bazel. It's Windows-clang-specific, but could be a good resource if we're ever trying to get things to work on Windows. We didn't add to it because it looked stale, had a somewhat inelegant approach, and wouldn't have had much reusable code. +The closest—and only aquery approach: https://github.com/thoren-d/compdb-bazel. It's Windows-clang-cl-specific, but could be a good resource when working on things for Windows. We didn't add to it because it looked stale, had a somewhat inelegant approach, and wouldn't have had much reusable code for the cross-platform focus of this tool. Action-listener-based approaches (see pros and cons above). These helped bootstrap our initial, action-listener-based implementation. @@ -162,11 +162,11 @@ Action-listener-based approaches (see pros and cons above). These helped bootstr - https://github.com/kythe/kythe/blob/master/kythe/cxx/extractor/README.md - Another C++ impl here, but with similar issues https://github.com/tolikzinovyev/bazel-compilation-db -Aspect-based approaches. They're the most commonly used despite the approach having real problems (see above). +Aspect-based approaches. They were the most commonly used despite the approach having real problems (see above). - https://github.com/grailbio/bazel-compilation-database is probably the most popular for generating Bazel compile_commands.json. But it's really far from working for us. No unwrapping, no ability to pick up platform flags, all the aspect issues, etc. - Bazel's official editor plugins. Note: editor plugins, *not* compile_commands.json generators. - Bazel's IntelliJ adapter. Problems? All the usual subjects for aspects. - - [Tulsi](https://github.com/bazelbuild/tulsi). Smarter about working around some of the issues, which they can do by reimplementing some of the Apple specific logic, rather than listening for it. See XCodeAdapter/ImplementationReadme.md. + - [Tulsi](https://github.com/bazelbuild/tulsi). Smarter about working around some of the issues, which they can do by reimplementing some of the Apple specific logic, rather than listening for it. - There's no support in the VSCode plugin. I'd filed https://github.com/bazelbuild/vscode-bazel/issues/179 originally. A totally different approach that won't work with Bazel: [BEAR (Build EAR)](https://github.com/rizsotto/Bear) builds `compile_commands.json` by listening in on compiler invocations and records what it finds during a build. This lets it work across a variety of build systems...except Bazel, because it's hermeticity measures (keeping compilation as a pure function) screen out exactly the type of tinkering BEAR tries to do. It might be doable, but it would need more injection work, probably, than we'd want, and a build to listen to. See Bazel marked "wontfix" [here](https://github.com/rizsotto/Bear/issues/170). From d7b365f58a4369871cd5f201b70a690fbfcc568e Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 16 Mar 2023 21:26:21 -0700 Subject: [PATCH 06/15] Tweaks/polish to #112 --- refresh.template.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 5520af5..75a9822 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -14,7 +14,8 @@ if sys.version_info < (3,6): sys.exit("\n\033[31mFATAL ERROR:\033[0m Python 3.6 or later is required. Please update!") # 3.6 backwards compatibility required by @zhanyong-wan in https://github.com/hedronvision/bazel-compile-commands-extractor/issues/111. - # 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27. Try to contact him before upgrading. + # 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27. + # ^ Try to contact before upgrading. # When adding things could be cleaner if we had a higher minimum version, please add a comment with MIN_PY=3.. # Similarly, when upgrading, please search for that MIN_PY= tag. @@ -100,8 +101,7 @@ def _get_bazel_cached_action_keys(): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( ['bazel', 'dump', '--action_cache'], - # The capture_output argument was introduced in python3.7, so we use - # stdout and stderr for python3.6 compatibility. + # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), @@ -230,8 +230,7 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke header_search_process = _subprocess_run_spilling_over_to_param_file_if_needed( # Note: gcc/clang can be run from Windows, too. header_cmd, - # The capture_output argument was introduced in python3.7, so we use - # stdout and stderr for python3.6 compatibility. + # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), @@ -241,7 +240,7 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke # Tolerate failure gracefully--during editing the code may not compile! if header_search_process.stderr: _print_header_finding_warning_once() - print(header_search_process.stderr, file=sys.stderr, end='') # Captured with stdout/stderr and dumped explicitly to avoid interlaced output. + print(header_search_process.stderr, file=sys.stderr, end='') # Stderr captured and dumped atomically to avoid interlaced output. if not header_search_process.stdout: # Worst case, we couldn't get the headers, return set() @@ -889,8 +888,7 @@ def _get_commands(target: str, flags: str): aquery_process = subprocess.run( aquery_args, - # The capture_output argument was introduced in python3.7, so we use - # stdout and stderr for python3.6 compatibility. + # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), From 354105798f43a6707595e14f6659c9292189434f Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 16 Mar 2023 22:08:17 -0700 Subject: [PATCH 07/15] Punctuation nit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f7a353c..7152af1 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ If you're using another editor, you'll need to follow the same rough steps as ab Once you've succeeded in setting up another editor—or set up `clang-tidy`, or otherwise seen anything that might improve this readme—we'd love it if you'd give back and contribute what you know! Just edit this `README.md` on GitHub and file a PR :) -## "Smooth Edges" — what we've enjoyed using this for. +## "Smooth Edges" — what we've enjoyed using this for You should now be all set to go! Way to make it through setup. From a8be2f874ceea265724d9581f3d0c9b58a9ba73f Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 16 Mar 2023 22:49:35 -0700 Subject: [PATCH 08/15] fix typo --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 75a9822..129dd0e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -156,7 +156,7 @@ def _parse_headers_from_makefile_deps(d_file_content: str, source_path_for_sanit @functools.lru_cache(maxsize=None) def _get_cached_adjusted_modified_time(path: str): - """Get (and cache!) the modified time of a file, slighly adjusted for easy comparison. + """Get (and cache!) the modified time of a file, slightly adjusted for easy comparison. This is primarily intended to check whether header include caches are fresh. From c4f71185b77e4945d536ee0ef8288419f28be67e Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Fri, 17 Mar 2023 18:37:13 -0700 Subject: [PATCH 09/15] Improve warning for no entries because of errors in build files. --- refresh.template.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 129dd0e..f5d5b9c 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -897,16 +897,12 @@ def _get_commands(target: str, flags: str): # Filter aquery error messages to just those the user should care about. + # Shush known warnings about missing graph targets. + # The missing graph targets are not things we want to introspect anyway. + # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. - for line in aquery_process.stderr.splitlines(): - # Shush known warnings about missing graph targets. - # The missing graph targets are not things we want to introspect anyway. - # Tracking issue https://github.com/bazelbuild/bazel/issues/13007. - if missing_targets_warning.match(line): - continue - - print(line, file=sys.stderr) - + aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) + if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) # Parse proto output from aquery try: @@ -918,7 +914,11 @@ def _get_commands(target: str, flags: str): return if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - log_warning(f""">>> Bazel lists no applicable compile commands for {target} + if aquery_process.stderr: + log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. + Continuing gracefully...""") + else: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). Continuing gracefully...""") return From 143514e235562a081cfc03d0e5cdee1b1fef14b7 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Fri, 17 Mar 2023 19:19:51 -0700 Subject: [PATCH 10/15] Minor cleanup: unnecessary triple quotes --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index f5d5b9c..a1c80b1 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1041,7 +1041,7 @@ def _ensure_cwd_is_workspace_root(): try: workspace_root = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY']) except KeyError: - log_error(""">>> BUILD_WORKSPACE_DIRECTORY was not found in the environment. Make sure to invoke this tool with `bazel run`.""") + log_error(">>> BUILD_WORKSPACE_DIRECTORY was not found in the environment. Make sure to invoke this tool with `bazel run`.") sys.exit(1) # Change the working directory to the workspace root (assumed by future commands). # Although this can fail (OSError/FileNotFoundError/PermissionError/NotADirectoryError), there's no easy way to recover, so we'll happily crash. From f7388651ee99608fb5f6336764657596e2f84b97 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Fri, 17 Mar 2023 19:47:36 -0700 Subject: [PATCH 11/15] Abort if output will be empty See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/109 for context --- refresh.template.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/refresh.template.py b/refresh.template.py index a1c80b1..03f28ec 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1063,6 +1063,11 @@ def _ensure_cwd_is_workspace_root(): for (target, flags) in target_flag_pairs: compile_command_entries.extend(_get_commands(target, flags)) + if not compile_command_entries: + log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. + There should be actionable warnings, above, that led to this.""") + sys.exit(1) + # Chain output into compile_commands.json with open('compile_commands.json', 'w') as output_file: json.dump( From c13cf115c772fb2647d86f8d6a43221aa21ee322 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 23 Dec 2022 23:41:18 +0800 Subject: [PATCH 12/15] file based filter --- refresh.template.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 03f28ec..f37618c 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -835,7 +835,9 @@ def _get_commands(target: str, flags: str): # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") - additional_flags = shlex.split(flags) + sys.argv[1:] + additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] + file_flags = [arg for arg in sys.argv[1:] if arg.startswith('--file=')] + assert len(file_flags) < 2, f"Only one or zero --file is supported current args = {sys.argv[1:]}" # Detect anything that looks like a build target in the flags, and issue a warning. # Note that positional arguments after -- are all interpreted as target patterns. (If it's at the end, then no worries.) @@ -858,6 +860,18 @@ def _get_commands(target: str, flags: str): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" + if (len(file_flags) == 1): + # Strip --file= + file_path = file_flags[0][7:] + # Query escape + file_path = file_path.replace("+", "\+").replace("-", "\-") + # For header file we try to find from hdrs and srcs to get the targets + if file_path.endswith('.h'): + # Since attr function can't query with full path, get the file name to query + head, tail = os.path.split(file_path) + target_statment = f"attr(hdrs, '{tail}', {target_statment}) + attr(srcs, '{tail}', {target_statment})" + else: + target_statment = f"inputs('{file_path}', {target_statment})" aquery_args = [ 'bazel', 'aquery', From 0ea3fd662a54bdb835107f65850f9fb0bf737d8c Mon Sep 17 00:00:00 2001 From: Christopher Peterson Sauer Date: Fri, 23 Dec 2022 16:53:22 -0800 Subject: [PATCH 13/15] Unify --file parsing logic Promote multiple --file assert to log_error Why? It's a usage error we're communicating to the user, not a code invariant we're checking in debug mode. We could recover, but we don't, because we expect only automated usage and want to give clear feedback. More pythonic if Improve my --file flag messaging consistency Call library to escape regex Helps avoid edge cases! Also handles the most common special character, '.' Add an error to catch unsupported --file form Tweak -- logic now that --file arguments could be after --file docs Make aquery spacing consistent Flip the if statement in constructor target_statment Use let to combine attr query Merge compile_commands.json if --file Small simplification: endswith already can take a tuple of possibilities (as elsewhere) Small simplification of header path for file flag. (This code section likely to be replaced later, though.) Fix my typo on --file= discoverable docs Some tweaks to merge Adds note about behavior, some edge case handling around other flags that might start with --file, permissions issues, etc. --- refresh.template.py | 49 +++++++++++++++++++++++------------- refresh_compile_commands.bzl | 2 ++ 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index f37618c..4b16402 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -3,8 +3,9 @@ Interface (after template expansion): - `bazel run` to regenerate compile_commands.json, so autocomplete (and any other clang tooling!) reflect the latest Bazel build files. - - No arguments are needed; info from the rule baked into the template expansion. + - No arguments are needed; info from the rule is baked into the template expansion. - Any arguments passed are interpreted as arguments needed for the builds being analyzed. + - The one exception is --file=, which can be used to update commands for just one file. This is intended for programmatic use from editor plugins. - Requires being run under Bazel so we can access the workspace root environment variable. - Output: a compile_commands.json in the workspace root that clang tooling (or you!) can look at to figure out how files are being compiled by Bazel - Crucially, this output is de-Bazeled; The result is a command that could be run from the workspace root directly, with no Bazel-specific requirements, environment variables, etc. @@ -835,14 +836,20 @@ def _get_commands(target: str, flags: str): # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") + # Pass along all arguments to aquery, except for --file= additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] - file_flags = [arg for arg in sys.argv[1:] if arg.startswith('--file=')] - assert len(file_flags) < 2, f"Only one or zero --file is supported current args = {sys.argv[1:]}" + file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] + if len(file_flags) > 1: + log_error(">>> At most one --file flag is supported.") + sys.exit(1) + if any(arg.startswith('--file') for arg in additional_flags): + log_error(">>> Only the --file= form is supported.") + sys.exit(1) # Detect anything that looks like a build target in the flags, and issue a warning. - # Note that positional arguments after -- are all interpreted as target patterns. (If it's at the end, then no worries.) + # Note that positional arguments after -- are all interpreted as target patterns. # And that we have to look for targets. checking for a - prefix is not enough. Consider the case of `-c opt` leading to a false positive - if ('--' in additional_flags[:-1] + if ('--' in additional_flags or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): log_warning(""">>> The flags you passed seem to contain targets. Try adding them as targets in your refresh_compile_commands rather than flags. @@ -860,25 +867,22 @@ def _get_commands(target: str, flags: str): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" - if (len(file_flags) == 1): - # Strip --file= - file_path = file_flags[0][7:] - # Query escape - file_path = file_path.replace("+", "\+").replace("-", "\-") - # For header file we try to find from hdrs and srcs to get the targets - if file_path.endswith('.h'): - # Since attr function can't query with full path, get the file name to query - head, tail = os.path.split(file_path) - target_statment = f"attr(hdrs, '{tail}', {target_statment}) + attr(srcs, '{tail}', {target_statment})" + if file_flags: + file_path = file_flags[0] + if file_path.endswith(_get_files.source_extensions): + target_statment = f"inputs('{re.escape(file_path)}', {target_statment})" else: - target_statment = f"inputs('{file_path}', {target_statment})" + # For header files we try to find from hdrs and srcs to get the targets + # Since attr function can't query with full path, get the file name to query + fname = os.path.basename(file_path) + target_statment = f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" aquery_args = [ 'bazel', 'aquery', # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. - f"mnemonic('(Objc|Cpp)Compile',{target_statment})", + f"mnemonic('(Objc|Cpp)Compile', {target_statment})", # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. '--output=jsonproto', # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. @@ -1081,6 +1085,17 @@ def _ensure_cwd_is_workspace_root(): log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. There should be actionable warnings, above, that led to this.""") sys.exit(1) + # --file triggers incremental update of compile_commands.json + if any(arg.startswith('--file=') for arg in sys.argv[1:]) and os.path.isfile('compile_commands.json'): + previous_compile_command_entries = [] + try: + with open('compile_commands.json') as compile_commands_file: + previous_compile_command_entries = json.load(compile_commands_file) + except: + log_warning(">>> Couldn't read previous compile_commands.json. Overwriting instead of merging...") + else: + updated_files = set(entry['file'] for entry in compile_command_entries) + compile_command_entries += [entry for entry in previous_compile_command_entries if entry['file'] not in updated_files] # Chain output into compile_commands.json with open('compile_commands.json', 'w') as output_file: diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index b9c5d32..70cf8f5 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -49,6 +49,8 @@ refresh_compile_commands( # exclude_headers = "external", # Still not fast enough? # Make sure you're specifying just the targets you care about by setting `targets`, above. + # That's still not enough; I'm working on a huge codebase! + # This tool supports a fast, incremental mode that can be used to add/update commands as individual files are opened. If you'd be willing to collaborate on writing a simple editor plugin invokes this tool on file open, please write in! (And see --file flag in refresh.template.py) ``` """ From 4244747c788fea5dd99a012aa3e5dcda584bb7b1 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sun, 19 Mar 2023 03:37:40 +0800 Subject: [PATCH 14/15] file based filter: deal with header lib --- refresh.template.py | 172 +++++++++++++++++++++++++------------------- 1 file changed, 98 insertions(+), 74 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 4b16402..57f257e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -832,7 +832,75 @@ def _convert_compile_commands(aquery_output): def _get_commands(target: str, flags: str): - """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" + """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" + def _get_commands(target_statment): + aquery_args = [ + 'bazel', + 'aquery', + # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html + # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto + # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. + f"mnemonic('(Objc|Cpp)Compile', {target_statment})", + # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. + '--output=jsonproto', + # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. + '--include_artifacts=false', + # Shush logging. Just for readability. + '--ui_event_filters=-info', + '--noshow_progress', + # Disable param files, which would obscure compile actions + # Mostly, people enable param files on Windows to avoid the relatively short command length limit. + # For more, see compiler_param_file in https://bazel.build/docs/windows + # They are, however, technically supported on other platforms/compilers. + # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. + # Since clangd has no such length limit, we'll disable param files for our aquery run. + '--features=-compiler_param_file', + # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation + # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 + # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. + # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. + '--features=-layering_check', + ] + additional_flags + + aquery_process = subprocess.run( + aquery_args, + # MIN_PY=3.7: Replace PIPEs with capture_output. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding=locale.getpreferredencoding(), + check=False, # We explicitly ignore errors from `bazel aquery` and carry on. + ) + + + # Filter aquery error messages to just those the user should care about. + # Shush known warnings about missing graph targets. + # The missing graph targets are not things we want to introspect anyway. + # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 + missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. + aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) + if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) + + # Parse proto output from aquery + try: + # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency + parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) + except json.JSONDecodeError: + print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) + log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") + return [] + + if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) + if aquery_process.stderr: + log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. + Continuing gracefully...""") + else: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") + return [] + + return _convert_compile_commands(parsed_aquery_output) + # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -862,90 +930,46 @@ def _get_commands(target: str, flags: str): Try adding them as flags in your refresh_compile_commands rather than targets. In a moment, Bazel will likely fail to parse.""") + compile_commands = [] # First, query Bazel's C-family compile actions for that configured target target_statment = f'deps({target})' - if {exclude_external_sources}: - # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statment = f"filter('^(//|@//)',{target_statment})" + if file_flags: file_path = file_flags[0] - if file_path.endswith(_get_files.source_extensions): - target_statment = f"inputs('{re.escape(file_path)}', {target_statment})" + found = False + target_statment_canidates = [] + if file_flags[0].endswith(_get_files.source_extensions): + target_statment_canidates.append(f"inputs('{re.escape(file_path)}', {target_statment})") else: - # For header files we try to find from hdrs and srcs to get the targets - # Since attr function can't query with full path, get the file name to query fname = os.path.basename(file_path) - target_statment = f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" - aquery_args = [ - 'bazel', - 'aquery', - # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html - # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto - # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. - f"mnemonic('(Objc|Cpp)Compile', {target_statment})", - # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. - '--output=jsonproto', - # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. - '--include_artifacts=false', - # Shush logging. Just for readability. - '--ui_event_filters=-info', - '--noshow_progress', - # Disable param files, which would obscure compile actions - # Mostly, people enable param files on Windows to avoid the relatively short command length limit. - # For more, see compiler_param_file in https://bazel.build/docs/windows - # They are, however, technically supported on other platforms/compilers. - # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. - # Since clangd has no such length limit, we'll disable param files for our aquery run. - '--features=-compiler_param_file', - # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation - # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 - # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. - # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. - '--features=-layering_check', - ] + additional_flags - - aquery_process = subprocess.run( - aquery_args, - # MIN_PY=3.7: Replace PIPEs with capture_output. - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding=locale.getpreferredencoding(), - check=False, # We explicitly ignore errors from `bazel aquery` and carry on. - ) - - - # Filter aquery error messages to just those the user should care about. - # Shush known warnings about missing graph targets. - # The missing graph targets are not things we want to introspect anyway. - # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 - missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. - aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) - if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) - - # Parse proto output from aquery - try: - # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency - parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) - except json.JSONDecodeError: - print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) - log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") - return - - if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - if aquery_process.stderr: - log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. - Continuing gracefully...""") - else: + target_statment_canidates.extend([ + f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)", + f"inputs('{re.escape(file_path)}', {target_statment})", + f'deps({target})', + ]) + + for target_statment in target_statment_canidates: + compile_commands.extend( _get_commands(target_statment)) + if any(command['file'].endswith(file_path) for command in reversed(compile_commands)): + found = True + break + if not found: + log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target}. + Continuing gracefully...""") + else: + if {exclude_external_sources}: + # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. + target_statment = f"filter('^(//|@//)',{target_statment})" + compile_commands.extend(_get_commands(target_statment)) + if len(compile_commands) == 0: log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") - return - - yield from _convert_compile_commands(parsed_aquery_output) + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") # Log clear completion messages log_success(f">>> Finished extracting commands for {target}") + return compile_commands def _ensure_external_workspaces_link_exists(): From 0befc0907e28860649ca485806573d4c5ccf8139 Mon Sep 17 00:00:00 2001 From: snorlax Date: Fri, 3 Mar 2023 21:50:59 +0800 Subject: [PATCH 15/15] End loop if header is found & Add max attempts limit --- refresh.template.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 57f257e..e8b2dd6 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -37,6 +37,8 @@ import time import types import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str] +import threading +import itertools @enum.unique @@ -182,6 +184,7 @@ def _get_cached_adjusted_modified_time(path: str): # Roughly 1 year into the future. This is safely below bazel's 10 year margin, but large enough that no sane normal file should be past this. BAZEL_INTERNAL_SOURCE_CUTOFF = time.time() + 60*60*24*365 +BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT = 500 def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str): """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.) @@ -766,11 +769,15 @@ def _all_platform_patch(compile_args: typing.List[str]): return compile_args -def _get_cpp_command_for_files(compile_action): +def _get_cpp_command_for_files(args): """Reformat compile_action into a compile command clangd can understand. Undo Bazel-isms and figures out which files clangd should apply the command to. """ + (compile_action, event, should_stop_lambda) = args + if event.is_set(): + return set(), set(), [] + # Patch command by platform compile_action.arguments = _all_platform_patch(compile_action.arguments) compile_action.arguments = _apple_platform_patch(compile_action.arguments) @@ -778,10 +785,12 @@ def _get_cpp_command_for_files(compile_action): source_files, header_files = _get_files(compile_action) + if not event.is_set() and should_stop_lambda(source_files, header_files): + event.set() return source_files, header_files, compile_action.arguments -def _convert_compile_commands(aquery_output): +def _convert_compile_commands(aquery_output, should_stop_lambda): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) compile actions for a given build. @@ -805,8 +814,8 @@ def _convert_compile_commands(aquery_output): with concurrent.futures.ThreadPoolExecutor( max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: - outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) - + event = threading.Event() + outputs = threadpool.map(_get_cpp_command_for_files, map(lambda action: (action, event, should_stop_lambda), aquery_output.actions)) # Yield as compile_commands.json entries header_files_already_written = set() for source_files, header_files, compile_command_args in outputs: @@ -833,7 +842,19 @@ def _convert_compile_commands(aquery_output): def _get_commands(target: str, flags: str): """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" - def _get_commands(target_statment): + lock = threading.RLock() + counter = itertools.count() + def _should_stop(headers, file_path): + if file_path: + with lock: + tried_count = next(counter) + if tried_count >= BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT: + log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target} under {tried_count} Attempt.""") + return True + return any(header.endswith(file_path) for header in headers) + return False + + def _get_commands(target_statment, file_path): aquery_args = [ 'bazel', 'aquery', @@ -899,7 +920,7 @@ def _get_commands(target_statment): Continuing gracefully...""") return [] - return _convert_compile_commands(parsed_aquery_output) + return _convert_compile_commands(parsed_aquery_output, lambda _, headers: _should_stop(headers, file_path)) # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -949,7 +970,7 @@ def _get_commands(target_statment): ]) for target_statment in target_statment_canidates: - compile_commands.extend( _get_commands(target_statment)) + compile_commands.extend( _get_commands(target_statment, file_path)) if any(command['file'].endswith(file_path) for command in reversed(compile_commands)): found = True break @@ -960,7 +981,7 @@ def _get_commands(target_statment): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" - compile_commands.extend(_get_commands(target_statment)) + compile_commands.extend(_get_commands(target_statment, None)) if len(compile_commands) == 0: log_warning(f""">>> Bazel lists no applicable compile commands for {target} If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md).