Updated BCS tutorial

sandialabs · Dec 23, 2022 · b37c8c5 · b37c8c5
1 parent 99229fc
commit b37c8c5
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 59 deletions.
diff --git a/PyUQTk/PyPCE/pce_tools.py b/PyUQTk/PyPCE/pce_tools.py
@@ -450,8 +450,8 @@ def UQTkBCS(pc_model, xdata, ydata, niter, eta, ntry=5, eta_folds=5,\
  for i in range(ntry):
  # Split the data
  ind_tr, ind_val = ind_split(nsam, 'trval', [ntrain, nsam - ntrain])
- qall_wb=xdata[ind_tr,:]
- yall_wb=ydata[ind_tr]
+ x_split=xdata[ind_tr,:]
+ y_split=ydata[ind_tr]
 
  if verbose>0:
  print("============ Split # %d / %d ============" % (i + 1, ntry))
@@ -467,27 +467,8 @@ def UQTkBCS(pc_model, xdata, ydata, niter, eta, ntry=5, eta_folds=5,\
  if verbose>1:
  print(mindex)
 
- # One run of BCS
- #UQTk array for samples - [#samples, #dimensions]
- samplepts = qall_wb
- sam_uqtk=uqtkarray.numpy2uqtk(np.asfortranarray(samplepts))
- # UQTk array for function f_evaluations - [#evaluations,]
- f_evaluations=yall_wb
- y = uqtkarray.numpy2uqtk(np.asfortranarray(f_evaluations))
- # Initial run of BCS
- weights, used = UQTkEvalBCS(pc_model, y, sam_uqtk, sigma, eta_opt, regparams, 0)
-
- # Coefficients in a numpy array
- c_k=np.zeros(pc_model.GetNumberPCTerms())
- for i in range(used.XSize()):
- c_k[i]=weights[i]
-
- # Obtain new multiindex with only terms selected by BCS
- mi_uqtk = uqtkarray.intArray2D(pc_model.GetNumberPCTerms(), sam_uqtk.YSize())
- pc_model.GetMultiIndex(mi_uqtk)
- used_mi_uqtk=uqtkarray.intArray2D()
- uqtkarray.subMatrix_row_int(mi_uqtk, used, used_mi_uqtk)
- used_mi_np=uqtkarray.uqtk2numpy(used_mi_uqtk)
+ # One run of BCS to obtain an array of coefficients and a new multiindex
+ c_k, used_mi_np = UQTkEvalBCS(pc_model, y_split, x_split, sigma, eta_opt, regparams, 0)
 
  # Custom 'cuts' by number of PC terms or by value of PC coefficients
  npcall = c_k.shape[0] # number of PC terms
@@ -496,7 +477,7 @@ def UQTkBCS(pc_model, xdata, ydata, niter, eta, ntry=5, eta_folds=5,\
  indhigh = np.arange(npcall)[np.abs(c_k) > pcf_thr] # indices of coefficients above the pcf_thr threshold
  if verbose>0:
  if indhigh.shape[0]<npcall:
- print(npcall-indhigh.shape[0],"coefficients below the magnitude threshold have been cut.")
+ print(npcall-indhigh.shape[0],"coefficients equal to zero or below the magnitude threshold have been cut.")
  indsort_ = np.abs(c_k[indhigh]).argsort()[::-1] # indices of largest to smallest coefficients
  if npccut==None:
  npccut=100000
@@ -545,7 +526,7 @@ def UQTkBCS(pc_model, xdata, ydata, niter, eta, ntry=5, eta_folds=5,\
  mindex_final = reduce(multidim_intersect, mi_selected)
  # If no intersection, add the constant term [not sure why]
  if mindex_final.shape[0] == 0:
- mindex_final = np.zeros((1, dim), dtype=int)
+ mindex_final = np.zeros((1, xdata.shape[1]), dtype=int)
 
  # create a pc object with the final multiindex
  mindex_final_uq=uqtkarray.intArray2D(mindex_final.shape[0], mindex_final.shape[1])
@@ -606,7 +587,7 @@ def UQTkOptimizeEta(pc_start, y, x, etas, niter, nfolds):
  for eta in etas:
 
  # Obtain coefficients through BCS
- pc_final, c_k = UQTkBCS(pc_start, x_tr, y_tr, y_tr, niter, eta)
+ pc_final, c_k = UQTkBCS(pc_start, x_tr, y_tr, niter, eta)
  # Evaluate the PCE at the validation points
  pce_evals = UQTkEvaluatePCE(pc_final, c_k, x_val)
 
@@ -623,16 +604,16 @@ def UQTkOptimizeEta(pc_start, y, x, etas, niter, nfolds):
  eta_opt = np.array(e_k).mean()
  return eta_opt
 ################################################################################
-def UQTkEvalBCS(pc_model, y, sam_uqtk, sigma, eta, regparams, verbose):
+def UQTkEvalBCS(pc_model, f_evaluations, samplepts, sigma, eta, regparams, verbose):
  """
  Perform one iteration of Bayesian compressive sensing
  Helper function for UQTkBCS
 
  Input:
  pc_model: PC object with information about the basis
- y: 1D UQTk array of function evaluations [#samples,]
- sam_uqtk: N-dimensional UQTk of array of samples [#samples, #dimensions]
- sigma: Inital noise variance we assume is in the data
+ f_evaluations: 1D NumPy array of function evaluations [#samples,]
+ sam_uqtk: N-dimensional NumPy array of samples [#samples, #dimensions]
+ sigma:  Inital noise variance we assume is in the data
  eta: Threshold for stopping the algorithm. Smaller values
  retain more nonzero coefficients.
  regparams: Regularization weights
@@ -644,8 +625,8 @@ def UQTkEvalBCS(pc_model, y, sam_uqtk, sigma, eta, regparams, verbose):
  verbose: Flag for optional print statements
 
  Output:
- weights: 1D UQTk array with PC coefficients at indices in used [#used,]
- used: 1D UQTk array with inidices of the sparse weights
+ c_k: 1D NumPy array of coefficients
+ used_mi_np: Multiindex with only terms selected by BCS
  """
  # Configure BCS parameters to defaults
  adaptive = 0 # Flag for adaptive CS, using a generative basis, set to 0 or 1
@@ -655,6 +636,11 @@ def UQTkEvalBCS(pc_model, y, sam_uqtk, sigma, eta, regparams, verbose):
 
  bcs_verbose = 0 # silence print statements
 
+ #UQTk array for samples - [#samples, #dimensions]
+ sam_uqtk=uqtkarray.numpy2uqtk(np.asfortranarray(samplepts))
+ # UQTk array for function f_evaluations - [#evaluations,]
+ y = uqtkarray.numpy2uqtk(np.asfortranarray(f_evaluations))
+
  # UQTk array for lambda_init
  lambda_init = regparams # Initial regularization weights, which will be updated through BCS.
  lam_uqtk=uqtkarray.dblArray1D(lambda_init.shape[0])
@@ -685,8 +671,20 @@ def UQTkEvalBCS(pc_model, y, sam_uqtk, sigma, eta, regparams, verbose):
  print("BCS has selected", used.XSize(), "basis terms out of",\
  pc_model.GetNumberPCTerms())
 
+ # Coefficients in a numpy array
+ c_k = np.zeros(pc_model.GetNumberPCTerms())
+ for i in range(used.XSize()):
+ c_k[i]=weights[i]
+
+ # Obtain new multiindex with only terms selected by BCS
+ mi_uqtk = uqtkarray.intArray2D(pc_model.GetNumberPCTerms(), sam_uqtk.YSize())
+ pc_model.GetMultiIndex(mi_uqtk)
+ used_mi_uqtk=uqtkarray.intArray2D()
+ uqtkarray.subMatrix_row_int(mi_uqtk, used, used_mi_uqtk)
+ used_mi_np=uqtkarray.uqtk2numpy(used_mi_uqtk)
+
  # Return coefficients and their locations with respect to the basis terms
- return weights, used
+ return c_k, used_mi_np
 
 ################################################################################
 def multidim_intersect(arr1, arr2):

diff --git a/examples/surrogate_genz/surrogate_genz-BCS.ipynb b/examples/surrogate_genz/surrogate_genz-BCS.ipynb
@@ -58,18 +58,24 @@
  "outputs": [],
  "source": [
  "# PC parameters\n",
- "nord = 8 # Order of the PCE\n",
+ "nord = 8 # Order of the final PCE basis\n",
  "ndim = 2 # Number of dimensions\n",
  "pc_type = \"LU\" # Polynomial type\n",
  "pc_alpha = 0.0 # Free parameter > -1 for Gamma-Laguerre and Beta-Jacobi PCs\n",
  "pc_beta = 1.0 # Free parameter > -1 for Gamma-Laguerre and Beta-Jacobi PCs\n",
  "\n",
  "# BCS parameters\n",
- "sigma = 1e-8 # inital noise variance; updated in BCS\n",
- "eta = 1/np.power(10,[i for i in range(1,16)]) # threshold for stopping the algorithm\n",
- "nfolds = 5 # number of folds for eta cross-validation\n",
- "upit = 0 # number of up-iterations\n",
- "conserve = True # whether to use conservative basis growth\n",
+ "niter = 3 # Number of iterations to run, must be > 0\n",
+ "eta = 1/np.power(10,[i for i in range(1,16)]) # Threshold for stopping the algorithm\n",
+ "ntry = 5 # Number of splits for cross-validation of the retained basis terms \n",
+ "eta_folds = 5 # Number of folds to use for eta cross-validation \n",
+ "mindex_growth = 'nonconservative' # Method for basis growth; options are \"conservative,\" \"nonconservative,\" or None\n",
+ "regparams = None # Regularization weights (provide an array or value); if None, they are autopopulated\n",
+ "sigma = 1e-8 # Inital noise variance; updated in BCS\n",
+ "trval_frac = None # Fraction of total data to use in each split\n",
+ "npccut = None # Maximum number of PC coefficients to retain\n",
+ "pcf_thr = None # Minimum value (by magnitude) for PC coefficients\n",
+ "verbose = 0 # Flag for print statements\n",
  "\n",
  "# Model Choice \n",
  "model = 'genz_osc' # Choices are 'genz_osc', 'genz_exp', 'genz_cont','genz_gaus','genz_cpeak', 'genz_ppeak'"
@@ -123,10 +129,17 @@
  "outputs": [],
  "source": [
  "# PC object with the starting basis\n",
- "pc_start = uqtkpce.PCSet(\"NISPnoq\", nord-upit, ndim, pc_type, pc_alpha, pc_beta)\n",
+ "\n",
+ "# determine order of the starting basis\n",
+ "if mindex_growth == None:\n",
+ " start_ord = nord # if no basis growth, order of the final basis = order of the starting basis\n",
+ "else:\n",
+ " start_ord = nord-niter+1 # if basis growth, shrink starting basis to allow for growth niter-1 times\n",
+ " \n",
+ "pc_start = uqtkpce.PCSet(\"NISPnoq\", start_ord, ndim, pc_type, pc_alpha, pc_beta)\n",
  "\n",
  "# Determine coefficients through BCS\n",
- "c_k, pc_final = pce_tools.UQTkBCS(pc_start, y_train, x_train, sigma, eta, nfolds, upit, conserve)"
+ "pc_final, c_k = pce_tools.UQTkBCS(pc_start, x_train, y_train, niter, eta, ntry, eta_folds, mindex_growth, regparams, sigma, trval_frac, npccut, pcf_thr, verbose)"
  ]
  },
  {
@@ -153,7 +166,7 @@
  {
  "data": {
  "text/plain": [
- "1.6007266300289272e-05"
+ "0.173903885535867"
  ]
  },
  "execution_count": 5,

diff --git a/examples/surrogate_genz/surrogate_genz.ipynb b/examples/surrogate_genz/surrogate_genz.ipynb
@@ -354,16 +354,23 @@
  },
  {
  "cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
  "id": "0d2dfeb1-e5ec-494d-a575-ee0a1bc4b71e",
  "metadata": {},
  "outputs": [],
  "source": [
- "sigma = 1e-8 # inital noise variance; updated in BCS\n",
- "eta = 1e-8 # threshold for stopping the algorithm\n",
- "upit = 0 # number of up-iterations\n",
- "conserve=True # whether to use conservative basis growth\n",
- "nfolds=5 # number of folds to use for eta cross-validation"
+ "# BCS parameters\n",
+ "niter = 2 # Number of iterations to run, must be > 0\n",
+ "eta = 1/np.power(10,[i for i in range(6,8)]) # Threshold for stopping the algorithm\n",
+ "ntry = 5 # Number of splits for cross-validation of the retained basis terms \n",
+ "eta_folds = 5 # Number of folds to use for eta cross-validation \n",
+ "mindex_growth = 'nonconservative' # Method for basis growth; options are \"conservative,\" \"nonconservative,\" or None\n",
+ "regparams = None # Regularization weights (provide an array or value); if None, they are autopopulated\n",
+ "sigma = 1e-8 # Inital noise variance; updated in BCS\n",
+ "trval_frac = None # Fraction of total data to use in each split\n",
+ "npccut = None # Maximum number of PC coefficients to retain\n",
+ "pcf_thr = None # Minimum value (by magnitude) for PC coefficients\n",
+ "verbose = 0 # Flag for print statements"
  ]
  },
  {
@@ -376,7 +383,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 13,
+ "execution_count": 17,
  "id": "6bec073b-6298-47c6-ba20-8a48cd6ac8cb",
  "metadata": {},
  "outputs": [],
@@ -389,7 +396,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 14,
+ "execution_count": 18,
  "id": "9de91054-432f-468a-8664-af6e54ddc454",
  "metadata": {},
  "outputs": [
@@ -403,21 +410,24 @@
  {
  "data": {
  "text/plain": [
- "0.0007809160070448021"
+ "0.04205840438380758"
  ]
  },
- "execution_count": 14,
+ "execution_count": 18,
  "metadata": {},
  "output_type": "execute_result"
  }
  ],
  "source": [
  "# PC object with the starting basis\n",
- "pc_start = uqtkpce.PCSet(\"NISPnoq\", nord-upit, ndim, pc_type, pc_alpha, pc_beta)\n",
+ "if mindex_growth == None:\n",
+ " start_ord = nord # if no basis growth, order of the final basis = order of the starting basis\n",
+ "else:\n",
+ " start_ord = nord-niter+1 # if basis growth, shrink starting basis to allow for growth niter-1 times\n",
+ "pc_start = uqtkpce.PCSet(\"NISPnoq\", start_ord, ndim, pc_type, pc_alpha, pc_beta)\n",
  "\n",
  "# BCS\n",
- "c_k4, pc_final = pce_tools.UQTkBCS(pc_start, y_train4, x_train4, sigma, eta, nfolds, upit, conserve)\n",
- "\n",
+ "pc_final, c_k4 = pce_tools.UQTkBCS(pc_start, x_train4, y_train4, niter, eta, ntry, eta_folds, mindex_growth, regparams, sigma, trval_frac, npccut, pcf_thr, verbose)\n",
  "# Evaluate PCE with the final basis\n",
  "x_test4=2*rng.random(n=nSam)-1\n",
  "pce_evals4=pce_tools.UQTkEvaluatePCE(pc_final,c_k4,x_test4)\n",
@@ -442,7 +452,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 15,
+ "execution_count": 19,
  "id": "07de8500-29e5-4797-98c9-a571071e7771",
  "metadata": {},
  "outputs": [
@@ -485,7 +495,7 @@
  " </tr>\n",
  " <tr>\n",
  " <th>BCS</th>\n",
- " <td>0.000781</td>\n",
+ " <td>0.042058</td>\n",
  " </tr>\n",
  " </tbody>\n",
  "</table>\n",
@@ -496,10 +506,10 @@
  "Full Galerkin 0.000020\n",
  "Sparse Galerkin 0.000019\n",
  "Regression 0.000354\n",
- "BCS 0.000781"
+ "BCS 0.042058"
  ]
  },
- "execution_count": 15,
+ "execution_count": 19,
  "metadata": {},
  "output_type": "execute_result"
  }