center X, add uniform noise

xunzheng · Jul 5, 2020 · e6c53d3 · e6c53d3
1 parent c3636d4
commit e6c53d3
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 12 deletions.
diff --git a/notears/linear.py b/notears/linear.py
@@ -64,6 +64,8 @@ def _func(w):
  n, d = X.shape
  w_est, rho, alpha, h = np.zeros(2 * d * d), 1.0, 0.0, np.inf # double w_est into (w_pos, w_neg)
  bnds = [(0, 0) if i == j else (0, None) for _ in range(2) for i in range(d) for j in range(d)]
+ if loss_type == 'l2':
+ X = X - np.mean(X, axis=0, keepdims=True)
  for _ in range(max_iter):
  w_new, h_new = None, None
  while rho < rho_max:
@@ -84,20 +86,20 @@ def _func(w):
 
 
 if __name__ == '__main__':
- import notears.utils as ut
- ut.set_random_seed(1)
+ from notears import utils
+ utils.set_random_seed(1)
 
  n, d, s0, graph_type, sem_type = 100, 20, 20, 'ER', 'gauss'
- B_true = ut.simulate_dag(d, s0, graph_type)
- W_true = ut.simulate_parameter(B_true)
+ B_true = utils.simulate_dag(d, s0, graph_type)
+ W_true = utils.simulate_parameter(B_true)
  np.savetxt('W_true.csv', W_true, delimiter=',')
 
- X = ut.simulate_linear_sem(W_true, n, sem_type)
+ X = utils.simulate_linear_sem(W_true, n, sem_type)
  np.savetxt('X.csv', X, delimiter=',')
 
  W_est = notears_linear(X, lambda1=0.1, loss_type='l2')
- assert ut.is_dag(W_est)
+ assert utils.is_dag(W_est)
  np.savetxt('W_est.csv', W_est, delimiter=',')
- acc = ut.count_accuracy(B_true, W_est != 0)
+ acc = utils.count_accuracy(B_true, W_est != 0)
  print(acc)
 
diff --git a/notears/utils.py b/notears/utils.py
@@ -78,10 +78,12 @@ def simulate_parameter(B, w_ranges=((-2.0, -0.5), (0.5, 2.0))):
 def simulate_linear_sem(W, n, sem_type, noise_scale=None):
  """Simulate samples from linear SEM with specified type of noise.
 
+ For uniform, noise z ~ uniform(-a, a), where a = noise_scale.
+
  Args:
  W (np.ndarray): [d, d] weighted adj matrix of DAG
  n (int): num of samples, n=inf mimics population risk
- sem_type (str): gauss, exp, gumbel, logistic, poisson
+ sem_type (str): gauss, exp, gumbel, uniform, logistic, poisson
  noise_scale (np.ndarray): scale parameter of additive noise, default all ones
 
  Returns:
@@ -98,6 +100,9 @@ def _simulate_single_equation(X, w, scale):
  elif sem_type == 'gumbel':
  z = np.random.gumbel(scale=scale, size=n)
  x = X @ w + z
+ elif sem_type == 'uniform':
+ z = np.random.uniform(low=-scale, high=scale, size=n)
+ x = X @ w + z
  elif sem_type == 'logistic':
  x = np.random.binomial(1, sigmoid(X @ w)) * 1.0
  elif sem_type == 'poisson':
@@ -107,18 +112,28 @@ def _simulate_single_equation(X, w, scale):
  return x
 
  d = W.shape[0]
- scale_vec = noise_scale if noise_scale else np.ones(d)
- if np.isinf(n):
+ if noise_scale is None:
+ scale_vec = np.ones(d)
+ elif np.isscalar(noise_scale):
+ scale_vec = noise_scale * np.ones(d)
+ else:
+ if len(noise_scale) != d:
+ raise ValueError('noise scale must be a scalar or has length d')
+ scale_vec = noise_scale
+ if not is_dag(W):
+ raise ValueError('W must be a DAG')
+ if np.isinf(n): # population risk for linear gauss SEM
  if sem_type == 'gauss':
  # make 1/d X'X = true cov
- X = np.sqrt(d) * np.diag(scale_vec) @ np.linalg.pinv(np.eye(d) - W)
+ X = np.sqrt(d) * np.diag(scale_vec) @ np.linalg.inv(np.eye(d) - W)
  return X
  else:
  raise ValueError('population risk not available')
- X = np.zeros([n, d])
+ # empirical risk
  G = ig.Graph.Weighted_Adjacency(W.tolist())
  ordered_vertices = G.topological_sorting()
  assert len(ordered_vertices) == d
+ X = np.zeros([n, d])
  for j in ordered_vertices:
  parents = G.neighbors(j, mode=ig.IN)
  X[:, j] = _simulate_single_equation(X[:, parents], W[parents, j], scale_vec[j])