Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update convex hull code #789

Closed
wants to merge 12 commits into from
Prev Previous commit
Next Next commit
revert old code changes in split_catalog_using_map.py
  • Loading branch information
christinawlindberg committed May 26, 2023
commit c25fe423475f04ffc8fa04dc0502b7ac5a78d4e2
189 changes: 143 additions & 46 deletions beast/tools/split_catalog_using_map.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
#!/usr/bin/env python3
"""
Split a simulated catalog and a set of AST results by number of sources
(to improve fitting efficiency for large grids).
Split a catalog and a set of AST results, by source or background
density bin. Uses one of the maps created by
'create_background_density_map'. From the split AST catalog, individual
noise models for different regions can be made, which can then be used
to fit the stars of the observed catalog which also fall in those
regions.

"""
import argparse
import numpy as np
from astropy.table import Table
from beast.tools.density_map import BinnedDensityMap

from beast.tools import beast_settings


def split_main(
beast_settings_info,
catfile,
astfile,
n_per_file=1000,
mapfile,
n_per_file=6250,
min_n_subfile=None,
sort_col="F475W_RATE",
):
Expand All @@ -32,6 +41,9 @@ def split_main(
astfile : string
name of the ast catalog file

mapfile : string
background or source density map file

n_per_file : int or None (default=6250)
If set, divide the split catalog into sub-catalogs with length
n_per_file. Good for photometry, not useful for ASTs.
Expand All @@ -49,18 +61,57 @@ def split_main(

"""

# process beast settings info
if isinstance(beast_settings_info, str):
settings = beast_settings.beast_settings(beast_settings_info)
elif isinstance(beast_settings_info, beast_settings.beast_settings):
settings = beast_settings_info
else:
raise TypeError(
"beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
)

# Create a binned density map, so both the observed and the ast
# catalog can be split using a consistent grouping (= binning) of
# the tiles

if all(
hasattr(settings, attr)
for attr in ["sd_binmode", "sd_Nbins", "sd_binwidth", "sd_custom"]
):
bdm = BinnedDensityMap.create(
mapfile,
bin_mode=settings.sd_binmode,
N_bins=settings.sd_Nbins,
bin_width=settings.sd_binwidth,
custom_bins=settings.sd_custom,
)
else:
raise RuntimeError(
"You need to specify all source density binning parameters (sd_binmode, sd_Nbins, sd_binwidth, sd_custom) in the beast_settings file. Unused parameters should be set to 'None'."
)

print("Splitting catalog")
split_simulated_catalog(
split_catalog_using_map(
catfile,
bdm,
n_per_file=n_per_file,
min_n_subfile=min_n_subfile,
sort_col=sort_col,
)
print("")
print("Splitting ASTs")
split_catalog_using_map(
astfile, bdm, ra_colname="RA_J2000", dec_colname="DEC_J2000", n_per_file=None
)


def split_simulated_catalog(
def split_catalog_using_map(
catfile,
n_per_file=1000,
binned_density_map,
ra_colname="RA",
dec_colname="DEC",
n_per_file=6250,
min_n_subfile=None,
sort_col="F475W_RATE",
):
Expand All @@ -72,6 +123,12 @@ def split_simulated_catalog(
catfile : string
name of the photometry catalog file

binned_density_map : BinnedDensityMap object
the binned density map for the field

ra_colname, dec_colname : string
labels for the RA and DEC columns

n_per_file : int or None (default=6250)
If set, divide the split catalog into sub-catalogs with length
n_per_file. Good for photometry, not useful for ASTs.
Expand All @@ -90,58 +147,96 @@ def split_simulated_catalog(
"""
cat = Table.read(catfile)

# write out sub-files, if chosen
if (n_per_file is not None) or (min_n_subfile is not None):

# calculate number of subfiles and number of stars per file
# - only n_per_file set
if (n_per_file is not None) and (min_n_subfile is None):
tot_subfiles = int(np.ceil(len(cat) / n_per_file))
curr_n_per_file = n_per_file
# - only min_n_subfile set
if (n_per_file is None) and (min_n_subfile is not None):
tot_subfiles = min_n_subfile
curr_n_per_file = int(np.ceil(len(cat) / tot_subfiles))
# - both are set: make sure the largest number of subfiles is used
if (n_per_file is not None) and (min_n_subfile is not None):
temp_tot_subfiles = int(np.ceil(len(cat) / n_per_file))
# n_per_file makes at least min_n_subfile -> use value from n_per_file
if min_n_subfile <= temp_tot_subfiles:
tot_subfiles = temp_tot_subfiles
ras = cat[ra_colname]
decs = cat[dec_colname]

bin_foreach_source = np.zeros(len(cat), dtype=int)
for i in range(len(cat)):
bin_foreach_source[i] = binned_density_map.bin_for_position(ras[i], decs[i])

binnrs = np.unique(bin_foreach_source)

for b in binnrs:
# write out file for this bin
sources_for_bin = np.where(bin_foreach_source == b)
print("bin {0}: {1} sources".format(b, len(sources_for_bin[0])))
subcat = cat[sources_for_bin]
subcat.write(catfile.replace(".fits", "_bin{}.fits".format(b)), overwrite=True)

# write out sub-files, if chosen
if (n_per_file is not None) or (min_n_subfile is not None):

# calculate number of subfiles and number of stars per file
# - only n_per_file set
if (n_per_file is not None) and (min_n_subfile is None):
tot_subfiles = int(np.ceil(len(sources_for_bin[0]) / n_per_file))
curr_n_per_file = n_per_file
# n_per_file doesn't make enough subfiles -> use min_n_subfile
else:
# - only min_n_subfile set
if (n_per_file is None) and (min_n_subfile is not None):
tot_subfiles = min_n_subfile
curr_n_per_file = int(
np.ceil(len(cat) / tot_subfiles)
)
curr_n_per_file = int(np.ceil(len(sources_for_bin[0]) / tot_subfiles))
# - both are set: make sure the largest number of subfiles is used
if (n_per_file is not None) and (min_n_subfile is not None):
temp_tot_subfiles = int(np.ceil(len(sources_for_bin[0]) / n_per_file))
# n_per_file makes at least min_n_subfile -> use value from n_per_file
if min_n_subfile <= temp_tot_subfiles:
tot_subfiles = temp_tot_subfiles
curr_n_per_file = n_per_file
# n_per_file doesn't make enough subfiles -> use min_n_subfile
else:
tot_subfiles = min_n_subfile
curr_n_per_file = int(
np.ceil(len(sources_for_bin[0]) / tot_subfiles)
)

print(
"dividing into "
+ str(tot_subfiles)
+ " subfiles for later fitting speed"
)
print(
"dividing into "
+ str(tot_subfiles)
+ " subfiles for later fitting speed"
)

# Sort the stars