MNT Applies black formatting to most of the code base (scikit-learn#1…

…8948)
neurodata · Jun 17, 2021 · 82df489 · 82df489
1 parent 0e7761c
commit 82df489
Show file tree

Hide file tree

Showing 513 changed files with 59,810 additions and 42,580 deletions.
diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py
@@ -15,15 +15,9 @@
 title = issue.title
 
 
-regex_to_labels = [
- (r"\bDOC\b", "Documentation"),
- (r"\bCI\b", "Build / CI")
-]
+regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
 
-labels_to_add = [
- label for regex, label in regex_to_labels
- if re.search(regex, title)
-]
+labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
 
 if labels_to_add:
  issue.add_to_labels(*labels_to_add)
diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py
@@ -10,16 +10,16 @@ class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
  Benchmarks for KMeans.
  """
 
- param_names = ['representation', 'algorithm', 'init']
- params = (['dense', 'sparse'], ['full', 'elkan'], ['random', 'k-means++'])
+ param_names = ["representation", "algorithm", "init"]
+ params = (["dense", "sparse"], ["full", "elkan"], ["random", "k-means++"])
 
  def setup_cache(self):
  super().setup_cache()
 
  def make_data(self, params):
  representation, algorithm, init = params
 
- if representation == 'sparse':
+ if representation == "sparse":
  data = _20newsgroups_highdim_dataset(n_samples=8000)
  else:
  data = _blobs_dataset(n_clusters=20)
@@ -29,44 +29,46 @@ def make_data(self, params):
  def make_estimator(self, params):
  representation, algorithm, init = params
 
- max_iter = 30 if representation == 'sparse' else 100
+ max_iter = 30 if representation == "sparse" else 100
 
- estimator = KMeans(n_clusters=20,
- algorithm=algorithm,
- init=init,
- n_init=1,
- max_iter=max_iter,
- tol=-1,
- random_state=0)
+ estimator = KMeans(
+ n_clusters=20,
+ algorithm=algorithm,
+ init=init,
+ n_init=1,
+ max_iter=max_iter,
+ tol=-1,
+ random_state=0,
+ )
 
  return estimator
 
  def make_scorers(self):
- self.train_scorer = (
- lambda _, __: neg_mean_inertia(self.X,
-  self.estimator.predict(self.X),
-  self.estimator.cluster_centers_))
- self.test_scorer = (
- lambda _, __: neg_mean_inertia(self.X_val,
-  self.estimator.predict(self.X_val),
-  self.estimator.cluster_centers_))
+ self.train_scorer = lambda _, __: neg_mean_inertia(
+ self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
+ )
+ self.test_scorer = lambda _, __: neg_mean_inertia(
+  self.X_val,
+ self.estimator.predict(self.X_val),
+ self.estimator.cluster_centers_,
+ )
 
 
 class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
  """
  Benchmarks for MiniBatchKMeans.
  """
 
- param_names = ['representation', 'init']
- params = (['dense', 'sparse'], ['random', 'k-means++'])
+ param_names = ["representation", "init"]
+ params = (["dense", "sparse"], ["random", "k-means++"])
 
  def setup_cache(self):
  super().setup_cache()
 
  def make_data(self, params):
  representation, init = params
 
- if representation == 'sparse':
+ if representation == "sparse":
  data = _20newsgroups_highdim_dataset()
  else:
  data = _blobs_dataset(n_clusters=20)
@@ -76,25 +78,27 @@ def make_data(self, params):
  def make_estimator(self, params):
  representation, init = params
 
- max_iter = 5 if representation == 'sparse' else 2
+ max_iter = 5 if representation == "sparse" else 2
 
- estimator = MiniBatchKMeans(n_clusters=20,
- init=init,
- n_init=1,
- max_iter=max_iter,
- batch_size=1000,
- max_no_improvement=None,
- compute_labels=False,
- random_state=0)
+ estimator = MiniBatchKMeans(
+ n_clusters=20,
+ init=init,
+ n_init=1,
+ max_iter=max_iter,
+ batch_size=1000,
+ max_no_improvement=None,
+ compute_labels=False,
+ random_state=0,
+ )
 
  return estimator
 
  def make_scorers(self):
- self.train_scorer = (
- lambda _, __: neg_mean_inertia(self.X,
-  self.estimator.predict(self.X),
-  self.estimator.cluster_centers_))
- self.test_scorer = (
- lambda _, __: neg_mean_inertia(self.X_val,
-  self.estimator.predict(self.X_val),
-  self.estimator.cluster_centers_))
+ self.train_scorer = lambda _, __: neg_mean_inertia(
+ self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
+ )
+ self.test_scorer = lambda _, __: neg_mean_inertia(
+  self.X_val,
+ self.estimator.predict(self.X_val),
+ self.estimator.cluster_centers_,
+ )
diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py
@@ -14,86 +14,102 @@ def get_from_config():
  """Get benchmarks configuration from the config.json file"""
  current_path = Path(__file__).resolve().parent
 
- config_path = current_path / 'config.json'
- with open(config_path, 'r') as config_file:
- config_file = ''.join(line for line in config_file
- if line and '//' not in line)
+ config_path = current_path / "config.json"
+ with open(config_path, "r") as config_file:
+ config_file = "".join(line for line in config_file if line and "//" not in line)
  config = json.loads(config_file)
 
- profile = os.getenv('SKLBENCH_PROFILE', config['profile'])
+ profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
 
- n_jobs_vals_env = os.getenv('SKLBENCH_NJOBS')
+ n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
  if n_jobs_vals_env:
  n_jobs_vals = eval(n_jobs_vals_env)
  else:
- n_jobs_vals = config['n_jobs_vals']
+ n_jobs_vals = config["n_jobs_vals"]
  if not n_jobs_vals:
  n_jobs_vals = list(range(1, 1 + cpu_count()))
 
- cache_path = current_path / 'cache'
+ cache_path = current_path / "cache"
  cache_path.mkdir(exist_ok=True)
- (cache_path / 'estimators').mkdir(exist_ok=True)
- (cache_path / 'tmp').mkdir(exist_ok=True)
+ (cache_path / "estimators").mkdir(exist_ok=True)
+ (cache_path / "tmp").mkdir(exist_ok=True)
 
- save_estimators = os.getenv('SKLBENCH_SAVE_ESTIMATORS',
- config['save_estimators'])
- save_dir = os.getenv('ASV_COMMIT', 'new')[:8]
+ save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
+ save_dir = os.getenv("ASV_COMMIT", "new")[:8]
 
  if save_estimators:
- (cache_path / 'estimators' / save_dir).mkdir(exist_ok=True)
+ (cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
 
- base_commit = os.getenv('SKLBENCH_BASE_COMMIT', config['base_commit'])
+ base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
 
- bench_predict = os.getenv('SKLBENCH_PREDICT', config['bench_predict'])
- bench_transform = os.getenv('SKLBENCH_TRANSFORM',
- config['bench_transform'])
+ bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
+ bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
 
- return (profile, n_jobs_vals, save_estimators, save_dir, base_commit,
- bench_predict, bench_transform)
+ return (
+ profile,
+ n_jobs_vals,
+ save_estimators,
+ save_dir,
+ base_commit,
+ bench_predict,
+ bench_transform,
+ )
 
 
 def get_estimator_path(benchmark, directory, params, save=False):
  """Get path of pickled fitted estimator"""
- path = Path(__file__).resolve().parent / 'cache'
- path = (path / 'estimators' / directory) if save else (path / 'tmp')
+ path = Path(__file__).resolve().parent / "cache"
+ path = (path / "estimators" / directory) if save else (path / "tmp")
 
- filename = (benchmark.__class__.__name__
- + '_estimator_' + '_'.join(list(map(str, params))) + '.pkl')
+ filename = (
+ benchmark.__class__.__name__
+ + "_estimator_"
+ + "_".join(list(map(str, params)))
+ + ".pkl"
+ )
 
  return path / filename
 
 
 def clear_tmp():
  """Clean the tmp directory"""
- path = Path(__file__).resolve().parent / 'cache' / 'tmp'
+ path = Path(__file__).resolve().parent / "cache" / "tmp"
  for child in path.iterdir():
  child.unlink()
 
 
 class Benchmark(ABC):
  """Abstract base class for all the benchmarks"""
+
  timer = timeit.default_timer # wall time
  processes = 1
  timeout = 500
 
- (profile, n_jobs_vals, save_estimators, save_dir, base_commit,
- bench_predict, bench_transform) = get_from_config()
-
- if profile == 'fast':
+ (
+ profile,
+ n_jobs_vals,
+ save_estimators,
+ save_dir,
+ base_commit,
+ bench_predict,
+ bench_transform,
+ ) = get_from_config()
+
+ if profile == "fast":
  warmup_time = 0
  repeat = 1
  number = 1
  min_run_count = 1
- data_size = 'small'
- elif profile == 'regular':
+ data_size = "small"
+ elif profile == "regular":
  warmup_time = 1
  repeat = (3, 100, 30)
- data_size = 'small'
- elif profile == 'large_scale':
+ data_size = "small"
+ elif profile == "large_scale":
  warmup_time = 1
  repeat = 3
  number = 1
- data_size = 'large'
+ data_size = "large"
 
  @property
  @abstractmethod
@@ -103,6 +119,7 @@ def params(self):
 
 class Estimator(ABC):
  """Abstract base class for all benchmarks of estimators"""
+
  @abstractmethod
  def make_data(self, params):
  """Return the dataset for a combination of parameters"""
@@ -112,8 +129,7 @@ def make_data(self, params):
 
  @abstractmethod
  def make_estimator(self, params):
- """Return an instance of the estimator for a combination of parameters
- """
+ """Return an instance of the estimator for a combination of parameters"""
  pass
 
  def skip(self, params):
@@ -137,9 +153,10 @@ def setup_cache(self):
 
  estimator.fit(X, y)
 
- est_path = get_estimator_path(self, Benchmark.save_dir,
- params, Benchmark.save_estimators)
- with est_path.open(mode='wb') as f:
+ est_path = get_estimator_path(
+ self, Benchmark.save_dir, params, Benchmark.save_estimators
+ )
+ with est_path.open(mode="wb") as f:
  pickle.dump(estimator, f)
 
  def setup(self, *params):
@@ -152,9 +169,10 @@ def setup(self, *params):
 
  self.X, self.X_val, self.y, self.y_val = self.make_data(params)
 
- est_path = get_estimator_path(self, Benchmark.save_dir,
- params, Benchmark.save_estimators)
- with est_path.open(mode='rb') as f:
+ est_path = get_estimator_path(
+ self, Benchmark.save_dir, params, Benchmark.save_estimators
+ )
+ with est_path.open(mode="rb") as f:
  self.estimator = pickle.load(f)
 
  self.make_scorers()
@@ -166,14 +184,14 @@ def peakmem_fit(self, *args):
  self.estimator.fit(self.X, self.y)
 
  def track_train_score(self, *args):
- if hasattr(self.estimator, 'predict'):
+ if hasattr(self.estimator, "predict"):
  y_pred = self.estimator.predict(self.X)
  else:
  y_pred = None
  return float(self.train_scorer(self.y, y_pred))
 
  def track_test_score(self, *args):
- if hasattr(self.estimator, 'predict'):
+ if hasattr(self.estimator, "predict"):
  y_val_pred = self.estimator.predict(self.X_val)
  else:
  y_val_pred = None
@@ -182,18 +200,20 @@ def track_test_score(self, *args):
 
 class Predictor(ABC):
  """Abstract base class for benchmarks of estimators implementing predict"""
+
  if Benchmark.bench_predict:
+
  def time_predict(self, *args):
  self.estimator.predict(self.X)
 
  def peakmem_predict(self, *args):
  self.estimator.predict(self.X)
 
  if Benchmark.base_commit is not None:
+
  def track_same_prediction(self, *args):
- est_path = get_estimator_path(self, Benchmark.base_commit,
- args, True)
- with est_path.open(mode='rb') as f:
+ est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+ with est_path.open(mode="rb") as f:
  estimator_base = pickle.load(f)
 
  y_val_pred_base = estimator_base.predict(self.X_val)
@@ -208,20 +228,21 @@ def params(self):
 
 
 class Transformer(ABC):
- """Abstract base class for benchmarks of estimators implementing transform
- """
+ """Abstract base class for benchmarks of estimators implementing transform"""
+
  if Benchmark.bench_transform:
+
  def time_transform(self, *args):
  self.estimator.transform(self.X)
 
  def peakmem_transform(self, *args):
  self.estimator.transform(self.X)
 
  if Benchmark.base_commit is not None:
+
  def track_same_transform(self, *args):
- est_path = get_estimator_path(self, Benchmark.base_commit,
- args, True)
- with est_path.open(mode='rb') as f:
+ est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+ with est_path.open(mode="rb") as f:
  estimator_base = pickle.load(f)
 
  X_val_t_base = estimator_base.transform(self.X_val)