refresh.template.py: for --file, don't continue traversing targets af…

…ter the first command has been found
xinzhengzhang · May 5, 2023 · 0453be4 · 0453be4
1 parent 6a6777a
commit 0453be4
Showing 1 changed file with 92 additions and 86 deletions.
diff --git a/refresh.template.py b/refresh.template.py
@@ -942,95 +942,104 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq
  return _convert_compile_commands(parsed_aquery_output, focused_on_file)
 
 
-def _get_commands(target: str, flags: str):
- """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors."""
- log_info(f">>> Analyzing commands used in {target}")
-
- # Parse the --file= flag, if any, passing along all other arguments to aquery
- additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')]
- file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')]
- if len(file_flags) > 1:
- log_error(">>> At most one --file flag is supported.")
- sys.exit(1)
- if any(arg.startswith('--file') for arg in additional_flags):
- log_error(">>> Only the --file=<file_target> form is supported.")
- sys.exit(1)
+def _get_commands(pairs: list):
+ """Return compile_commands.json entries for a given list of target and flags, gracefully tolerating errors."""
+
+ all_compile_commands = []
+
+ for target, flags in pairs:
+ log_info(f">>> Analyzing commands used in {target}")
+ # Parse the --file= flag, if any, passing along all other arguments to aquery
+ additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')]
+ file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')]
+ if len(file_flags) > 1:
+ log_error(">>> At most one --file flag is supported.")
+ sys.exit(1)
+ if any(arg.startswith('--file') for arg in additional_flags):
+ log_error(">>> Only the --file=<file_target> form is supported.")
+ sys.exit(1)
+
+
+ # Screen the remaining flags for obvious issues to help people debug.
+
+ # Detect anything that looks like a build target in the flags, and issue a warning.
+ # Note that positional arguments after -- are all interpreted as target patterns.
+ # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive.
+ if ('--' in additional_flags
+ or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)):
+ log_warning(""">>> The flags you passed seem to contain targets.
+ Try adding them as targets in your refresh_compile_commands rather than flags.
+ [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""")
+
+ # Quick (imperfect) effort at detecting flags in the targets.
+ # Can't detect flags starting with -, because they could be subtraction patterns.
+ if any(target.startswith('--') for target in shlex.split(target)):
+ log_warning(""">>> The target you specified seems to contain flags.
+ Try adding them as flags in your refresh_compile_commands rather than targets.
+ In a moment, Bazel will likely fail to parse.""")
+
 
+ # Then, actually query Bazel's compile actions for that configured target
+ target_statement_candidates = []
+ file_path = None
 
- # Screen the remaining flags for obvious issues to help people debug.
-
- # Detect anything that looks like a build target in the flags, and issue a warning.
- # Note that positional arguments after -- are all interpreted as target patterns.
- # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive.
- if ('--' in additional_flags
- or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)):
- log_warning(""">>> The flags you passed seem to contain targets.
- Try adding them as targets in your refresh_compile_commands rather than flags.
- [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""")
-
- # Quick (imperfect) effort at detecting flags in the targets.
- # Can't detect flags starting with -, because they could be subtraction patterns.
- if any(target.startswith('--') for target in shlex.split(target)):
- log_warning(""">>> The target you specified seems to contain flags.
- Try adding them as flags in your refresh_compile_commands rather than targets.
- In a moment, Bazel will likely fail to parse.""")
-
-
- # Then, actually query Bazel's compile actions for that configured target
- target_statement_candidates = []
- file_path = None
- compile_commands = []
-
- if file_flags:
- file_path = file_flags[0]
- rel_path = os.path.relpath(file_path, os.getcwd())
- if not rel_path.startswith(".."):
- log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}")
- file_path = rel_path
-
- target_statement = f"deps('{target}')"
- if file_path.endswith(_get_files.source_extensions):
- target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})")
+ if file_flags:
+ file_path = file_flags[0]
+ rel_path = os.path.relpath(file_path, os.getcwd())
+ if not rel_path.startswith(".."):
+ log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}")
+ file_path = rel_path
+
+ target_statement = f"deps('{target}')"
+ if file_path.endswith(_get_files.source_extensions):
+ target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})")
+ else:
+ fname = os.path.basename(file_path)
+ label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split()
+ # TODO compatible with windows file path
+ file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates))
+ file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname
+
+ header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this.
+ target_statement_candidates.extend([
+ header_target_statement,
+ f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, <maybe some limited depth>) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below.
+ target_statement,
+ ])
  else:
- fname = os.path.basename(file_path)
- label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split()
- # TODO compatible with windows file path
- file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates))
- file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname
-
- header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this.
- target_statement_candidates.extend([
- header_target_statement,
- f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, <maybe some limited depth>) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below.
- target_statement,
- ])
- else:
- if {exclude_external_sources}:
- # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers.
- target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})")
-
- found = False
- for target_statement in target_statement_candidates:
- commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path)
- compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not.
+ if {exclude_external_sources}:
+ # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers.
+ target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})")
+
+ found = False
+ compile_commands = []
+ for target_statement in target_statement_candidates:
+ commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path)
+ compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not.
+ if file_flags:
+ if any(command.file.endswith(file_path) for command in commands):
+ found = True
+ break
+ log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement}
+ Continuing gracefully...""")
+
+ all_compile_commands.extend(compile_commands)
+
  if file_flags:
- if any(command.file.endswith(file_path) for command in commands):
- found = True
+ if found:
+ log_success(f">>> Finished extracting commands for {target} with --file {file_path}")
  break
- log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement}
- Continuing gracefully...""")
-
- if file_flags and not found:
- log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target}
- Continuing gracefully...""")
+ else:
+ log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target}
+ Continuing gracefully...""")
 
- if not compile_commands:
- log_warning(f""">>> Bazel lists no applicable compile commands for {target}
- If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md).
- Continuing gracefully...""")
+  if not compile_commands:
+  log_warning(f""">>> Bazel lists no applicable compile commands for {target}
+  If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md).
+  Continuing gracefully...""")
 
- log_success(f">>> Finished extracting commands for {target}")
- return compile_commands
+  log_success(f">>> Finished extracting commands for {target}")
+ return all_compile_commands
 
 
 def _ensure_external_workspaces_link_exists():
@@ -1156,16 +1165,13 @@ def _ensure_cwd_is_workspace_root():
  _ensure_gitignore_entries_exist()
  _ensure_external_workspaces_link_exists()
 
- # TODO for --file, don't continue traversing targets after the first command has been found. Probably push this looping and template expansion inside of _get_commands().
  target_flag_pairs = [
  # Begin: template filled by Bazel
  {target_flag_pairs}
  # End: template filled by Bazel
  ]
 
- compile_command_entries = []
- for (target, flags) in target_flag_pairs:
- compile_command_entries.extend(_get_commands(target, flags))
+ compile_command_entries = _get_commands(target_flag_pairs)
 
  if not compile_command_entries:
  log_error(">>> Not writing to compile_commands.json, since no commands were extracted.")