revert old code changes in split_catalog_using_map.py

BEAST-Fitting · christinawlindberg · Jun 24, 2022 · May 26, 2023 · May 26, 2023 · May 26, 2023
commit c25fe423475f04ffc8fa04dc0502b7ac5a78d4e2
diff --git a/beast/tools/split_catalog_using_map.py b/beast/tools/split_catalog_using_map.py
@@ -1,18 +1,27 @@
 #!/usr/bin/env python3
 """
-Split a simulated catalog and a set of AST results by number of sources
-(to improve fitting efficiency for large grids).
+Split a catalog and a set of AST results, by source or background
+density bin. Uses one of the maps created by
+'create_background_density_map'. From the split AST catalog, individual
+noise models for different regions can be made, which can then be used
+to fit the stars of the observed catalog which also fall in those
+regions.
+
 """
 import argparse
 import numpy as np
 from astropy.table import Table
+from beast.tools.density_map import BinnedDensityMap
+
+from beast.tools import beast_settings
 
 
 def split_main(
  beast_settings_info,
  catfile,
  astfile,
- n_per_file=1000,
+ mapfile,
+ n_per_file=6250,
  min_n_subfile=None,
  sort_col="F475W_RATE",
 ):
@@ -32,6 +41,9 @@ def split_main(
  astfile : string
  name of the ast catalog file
 
+ mapfile : string
+ background or source density map file
+
  n_per_file : int or None (default=6250)
  If set, divide the split catalog into sub-catalogs with length
  n_per_file. Good for photometry, not useful for ASTs.
@@ -49,18 +61,57 @@ def split_main(
 
  """
 
+ # process beast settings info
+ if isinstance(beast_settings_info, str):
+ settings = beast_settings.beast_settings(beast_settings_info)
+ elif isinstance(beast_settings_info, beast_settings.beast_settings):
+ settings = beast_settings_info
+ else:
+ raise TypeError(
+ "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
+ )
+
+ # Create a binned density map, so both the observed and the ast
+ # catalog can be split using a consistent grouping (= binning) of
+ # the tiles
+
+ if all(
+ hasattr(settings, attr)
+ for attr in ["sd_binmode", "sd_Nbins", "sd_binwidth", "sd_custom"]
+ ):
+ bdm = BinnedDensityMap.create(
+ mapfile,
+ bin_mode=settings.sd_binmode,
+ N_bins=settings.sd_Nbins,
+ bin_width=settings.sd_binwidth,
+ custom_bins=settings.sd_custom,
+ )
+ else:
+ raise RuntimeError(
+ "You need to specify all source density binning parameters (sd_binmode, sd_Nbins, sd_binwidth, sd_custom) in the beast_settings file. Unused parameters should be set to 'None'."
+ )
+
  print("Splitting catalog")
- split_simulated_catalog(
+ split_catalog_using_map(
  catfile,
+ bdm,
  n_per_file=n_per_file,
  min_n_subfile=min_n_subfile,
  sort_col=sort_col,
  )
+ print("")
+ print("Splitting ASTs")
+ split_catalog_using_map(
+ astfile, bdm, ra_colname="RA_J2000", dec_colname="DEC_J2000", n_per_file=None
+ )
 
 
-def split_simulated_catalog(
+def split_catalog_using_map(
  catfile,
- n_per_file=1000,
+ binned_density_map,
+ ra_colname="RA",
+ dec_colname="DEC",
+ n_per_file=6250,
  min_n_subfile=None,
  sort_col="F475W_RATE",
 ):
@@ -72,6 +123,12 @@ def split_simulated_catalog(
  catfile : string
  name of the photometry catalog file
 
+ binned_density_map : BinnedDensityMap object
+ the binned density map for the field
+
+ ra_colname, dec_colname : string
+ labels for the RA and DEC columns
+
  n_per_file : int or None (default=6250)
  If set, divide the split catalog into sub-catalogs with length
  n_per_file. Good for photometry, not useful for ASTs.
@@ -90,58 +147,96 @@ def split_simulated_catalog(
  """
  cat = Table.read(catfile)
 
- # write out sub-files, if chosen
- if (n_per_file is not None) or (min_n_subfile is not None):
-
- # calculate number of subfiles and number of stars per file
- # - only n_per_file set
- if (n_per_file is not None) and (min_n_subfile is None):
- tot_subfiles = int(np.ceil(len(cat) / n_per_file))
- curr_n_per_file = n_per_file
- # - only min_n_subfile set
- if (n_per_file is None) and (min_n_subfile is not None):
- tot_subfiles = min_n_subfile
- curr_n_per_file = int(np.ceil(len(cat) / tot_subfiles))
- # - both are set: make sure the largest number of subfiles is used
- if (n_per_file is not None) and (min_n_subfile is not None):
- temp_tot_subfiles = int(np.ceil(len(cat) / n_per_file))
- # n_per_file makes at least min_n_subfile -> use value from n_per_file
- if min_n_subfile <= temp_tot_subfiles:
- tot_subfiles = temp_tot_subfiles
+ ras = cat[ra_colname]
+ decs = cat[dec_colname]
+
+ bin_foreach_source = np.zeros(len(cat), dtype=int)
+ for i in range(len(cat)):
+ bin_foreach_source[i] = binned_density_map.bin_for_position(ras[i], decs[i])
+
+ binnrs = np.unique(bin_foreach_source)
+
+ for b in binnrs:
+ # write out file for this bin
+ sources_for_bin = np.where(bin_foreach_source == b)
+ print("bin {0}: {1} sources".format(b, len(sources_for_bin[0])))
+ subcat = cat[sources_for_bin]
+ subcat.write(catfile.replace(".fits", "_bin{}.fits".format(b)), overwrite=True)
+
+ # write out sub-files, if chosen
+ if (n_per_file is not None) or (min_n_subfile is not None):
+
+ # calculate number of subfiles and number of stars per file
+ # - only n_per_file set
+ if (n_per_file is not None) and (min_n_subfile is None):
+ tot_subfiles = int(np.ceil(len(sources_for_bin[0]) / n_per_file))
  curr_n_per_file = n_per_file
- # n_per_file doesn't make enough subfiles -> use min_n_subfile
- else:
+ # - only min_n_subfile set
+ if (n_per_file is None) and (min_n_subfile is not None):
  tot_subfiles = min_n_subfile
- curr_n_per_file = int(
- np.ceil(len(cat) / tot_subfiles)
- )
+ curr_n_per_file = int(np.ceil(len(sources_for_bin[0]) / tot_subfiles))
+ # - both are set: make sure the largest number of subfiles is used
+ if (n_per_file is not None) and (min_n_subfile is not None):
+ temp_tot_subfiles = int(np.ceil(len(sources_for_bin[0]) / n_per_file))
+ # n_per_file makes at least min_n_subfile -> use value from n_per_file
+ if min_n_subfile <= temp_tot_subfiles:
+ tot_subfiles = temp_tot_subfiles
+ curr_n_per_file = n_per_file
+ # n_per_file doesn't make enough subfiles -> use min_n_subfile
+ else:
+ tot_subfiles = min_n_subfile
+ curr_n_per_file = int(
+ np.ceil(len(sources_for_bin[0]) / tot_subfiles)
+ )
 
- print(
- "dividing into "
- + str(tot_subfiles)
- + " subfiles for later fitting speed"
- )
+  print(
+  "dividing into "
+  + str(tot_subfiles)
+  + " subfiles for later fitting speed"
+  )
 
- # Sort the stars