Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Can someone help me figure out how to change the MLP structure here to a KAN structure? #93

Closed
jackzhangqing opened this issue May 6, 2024 · 2 comments

Comments

@jackzhangqing
Copy link

class PhysicsInformedNN:

# Initialize the class
def __init__(self, x, y, u0_real, u0_imag, u0_real_xx, u0_real_yy, u0_imag_xx,u0_imag_yy, m, m0, eta, delta, layers, omega):
    
    X = np.concatenate([x, y], 1)
    
    self.lb = X.min(0)
    self.ub = X.max(0)
            
    self.X = X
    
    self.x = X[:,0:1]
    self.y = X[:,1:2]
    self.u0_real = u0_real
    self.u0_imag = u0_imag

    self.u0_real_xx = u0_real_xx
    self.u0_imag_xx = u0_imag_xx

    self.u0_real_yy = u0_real_yy
    self.u0_imag_yy = u0_imag_yy

    self.m = m
    self.m0 = m0
    self.eta = eta
    self.delta = delta

    self.layers = layers

    self.omega = omega
    





    # Initialize NN
    self.weights, self.biases = self.initialize_NN(layers)  

    # tf placeholders 
    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                 log_device_placement=True))
    
    self.x_tf = tf.placeholder(tf.float32, shape=[None, self.x.shape[1]])
    self.y_tf = tf.placeholder(tf.float32, shape=[None, self.y.shape[1]])
    
    self.du_real_pred, self.du_imag_pred, self.f_real_pred, self.f_imag_pred, self.fq_real_pred, self.fq_imag_pred = self.net_NS(self.x_tf, self.y_tf)

    # loss function we define
    self.loss = tf.reduce_sum(tf.square(self.f_real_pred)) + tf.reduce_sum(tf.square(self.f_imag_pred)) + \
                tf.reduce_sum(tf.square(self.fq_real_pred)) + tf.reduce_sum(tf.square(self.fq_imag_pred)) 
                
    
    # optimizer used by default (in original paper)        
        
    self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.loss, 
                                                            method = 'L-BFGS-B', 
                                                            options = {'maxiter': 50000,
                                                                       'maxfun': 50000,
                                                                       'maxcor': 50,
                                                                       'maxls': 50,
                                                                       'ftol' : 1.0 * np.finfo(float).eps})        
    
    self.optimizer_Adam = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999,
                                                 epsilon=1e-08, use_locking=False, name='Adam')
    self.train_op_Adam = self.optimizer_Adam.minimize(self.loss)                    
    
    init = tf.global_variables_initializer()
    self.sess.run(init)



def initialize_NN(self, layers):        
    weights = []
    biases = []
    num_layers = len(layers) 
    for l in range(0,num_layers-1):
        W = self.xavier_init(size=[layers[l], layers[l+1]])
        b = tf.Variable(tf.zeros([1,layers[l+1]], dtype=tf.float32)+0.0, dtype=tf.float32)
        weights.append(W)
        biases.append(b)        
    return weights, biases 
def xavier_init(self, size):
    in_dim = size[0]
    out_dim = size[1]        
    xavier_stddev = np.sqrt(2/(in_dim + out_dim))
    return tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)


def neural_net(self, X, weights, biases):
    num_layers = len(weights) + 1
    H = 2.0*(X - self.lb)/(self.ub - self.lb) - 1.0
    for l in range(0,num_layers-2):
        W = weights[l]
        b = biases[l]
        #H = tf.tanh(tf.add(tf.matmul(H, W), b))
        H = tf.atan(tf.add(tf.matmul(H, W), b))
    W = weights[-1]
    b = biases[-1]
    Y = tf.add(tf.matmul(H, W), b)
    return Y
def net_NS(self, x, y):
    # output scattered wavefield: du_real du_imag, 
    # loss function: L du+omega^2 dm u0 
    omega = self.omega
    m = self.m
    m0 = self.m0
    eta = self.eta
    delta = self.delta
    u0_real = self.u0_real
    u0_imag = self.u0_imag

    u0_real_xx = self.u0_real_xx
    u0_imag_xx = self.u0_imag_xx

    u0_real_yy = self.u0_real_yy
    u0_imag_yy = self.u0_imag_yy

    p_and_q = self.neural_net(tf.concat([x,y], 1), self.weights, self.biases)

    du_real = p_and_q[:,0:1]
    du_imag = p_and_q[:,1:2]
    q_real = p_and_q[:,2:3]
    q_imag = p_and_q[:,3:4]

    du_real_x = tf.gradients(du_real, x)[0]
    du_real_y = tf.gradients(du_real, y)[0]
    du_real_xx = tf.gradients(du_real_x, x)[0]
    du_real_yy = tf.gradients(du_real_y, y)[0]

    du_imag_x = tf.gradients(du_imag, x)[0]
    du_imag_y = tf.gradients(du_imag, y)[0]
    du_imag_xx = tf.gradients(du_imag_x, x)[0]
    du_imag_yy = tf.gradients(du_imag_y, y)[0]

    q_real_x = tf.gradients(q_real, x)[0]
    q_real_y = tf.gradients(q_real, y)[0]
    q_real_xx = tf.gradients(q_real_x, x)[0]
    q_real_yy = tf.gradients(q_real_y, y)[0]

    q_imag_x = tf.gradients(q_imag, x)[0]
    q_imag_y = tf.gradients(q_imag, y)[0]
    q_imag_xx = tf.gradients(q_imag_x, x)[0]
    q_imag_yy = tf.gradients(q_imag_y, y)[0]

    f_real =  omega*omega*m*du_real + du_real_xx + q_real_xx + du_real_yy/(1+2*delta) + omega*omega*(m-m0)*u0_real + (1/(1+2*delta)-1)*u0_real_yy 
    f_imag =  omega*omega*m*du_imag + du_imag_xx + q_imag_xx + du_imag_yy/(1+2*delta) + omega*omega*(m-m0)*u0_imag + (1/(1+2*delta)-1)*u0_imag_yy

    fq_real = omega*omega*m*q_real + 2*eta*(du_real_xx + q_real_xx) + 2*eta*u0_real_xx
    fq_imag = omega*omega*m*q_imag + 2*eta*(du_imag_xx + q_imag_xx) + 2*eta*u0_imag_xx

    return du_real, du_imag, f_real, f_imag, fq_real, fq_imag        








def callback(self, loss):
    print('Loss: %.3e' % (loss))
    misfit1.append(loss) 
    
def train(self, nIter): 
    tf_dict = {self.x_tf: self.x, self.y_tf: self.y}
    
    start_time = time.time()
    for it in range(nIter):
        self.sess.run(self.train_op_Adam, tf_dict)
        loss_value = self.sess.run(self.loss, tf_dict)
        misfit.append(loss_value)         
        # Print
        if it % 10 == 0:
            elapsed = time.time() - start_time
            loss_value = self.sess.run(self.loss, tf_dict)
            #misfit.append(loss_value)
            print('It: %d, Loss: %.3e,Time: %.2f' % 
                  (it, loss_value, elapsed))
            start_time = time.time()

        
    self.optimizer.minimize(self.sess,
                            feed_dict = tf_dict,
                            fetches = [self.loss],
                            loss_callback = self.callback)

        

def predict(self, x_star, y_star):
    
    tf_dict = {self.x_tf: x_star, self.y_tf: y_star}
    
    du_real_star = self.sess.run(self.du_real_pred, tf_dict)
    du_imag_star = self.sess.run(self.du_imag_pred, tf_dict)

    return du_real_star, du_imag_star
@KindXiaoming
Copy link
Owner

KindXiaoming commented May 6, 2024

Hi, here is a minimal example how you can use KAN to solve PDE, hope this helps!

@jackzhangqing
Copy link
Author

Hi, here is a minimal example how you can use KAN to solve PDE, hope this helps!

Thank you for your assistance. Following your example, I replaced the MLP in PINN with the KAN for solving the PDE process. Below are the modified code and the error messages. I've been working on this for quite some time and am unsure how to resolve it effectively。

加载数据 Helm1

data = scipy.io.loadmat('./data/Homo_4Hz_singlesource_ps.mat')
'''
A 40401x1 646416 double complex
B 40401x1 646416 double complex
C 40401x1 646416 double complex
Ps 40401x1 646416 double complex

U_imag 40401x1 323208 double
U_real 40401x1 323208 double

m 40401x1 323208 double
x_star 40401x1 323208 double
z_star 40401x1 323208 double
'''
x = data['x_star']
z = data['z_star']

ps = data['Ps']
m = data['m']
A = data['A']
B = data['B']
C = data['C']

N = x.shape[0]
N_train = N
idx = np.random.choice(N, N_train, replace=False)
x_train = x[idx, :]
z_train = z[idx, :]
x = torch.tensor(x_train, dtype=torch.float32)
z = torch.tensor(z_train, dtype=torch.float32)

# 在第二维上连接 x 和 z,形成一个 40401*2 的张量

x_i = torch.cat((x_tensor, z_tensor), dim=1)

ps_train = ps[idx, :]
m_train = m[idx, :]
A_train = A[idx, :]
B_train = B[idx, :]
C_train = C[idx, :]

定义KAN模型

第一种情况:2个输入 2个输出

MLP: [2, 40, 40, 40, 40, 40, 40, 40, 40, 2]

model = KAN(width=[2, 10, 10, 10, 10, 2], grid=5, k=3, grid_eps=1.0, noise_scale_base=0.25)

实现了一个批量样本的雅可比矩阵的计算。

def batch_jacobian(func, x, create_graph=False):
# x in shape (Batch, Length)
def _func_sum(x):
return func(x).sum(dim=0)
return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

计算梯度

def fwd_gradients(Y, x):
dummy = torch.ones_like(Y)
G = torch.autograd.grad(Y, x, grad_outputs=dummy, retain_graph=True)[0]
Y_x = torch.autograd.grad(G, dummy, retain_graph=True)[0]
return Y_x

steps = 25000

loss_values = []

def train():
optimizer = LBFGS(model.parameters(),lr=1, history_size=10, line_search_fn="strong_wolfe", tolerance_grad=1e-32, tolerance_change=1e-32, tolerance_ys=1e-32)

pbar = tqdm(range(steps), desc='description')

global loss

for it in pbar:
    def closure():

        optimizer.zero_grad()

        # pde loss
        ureal_and_uimag = model(torch.cat((x, z), dim=1))
        u_real = ureal_and_uimag[:, 0:1]
        u_imag = ureal_and_uimag[:, 1:2]
        u = torch.complex(u_real, u_imag)


        dudx = fwd_gradients(u, x)
        dudz = fwd_gradients(u, z)
        dudxx = fwd_gradients((A_train * dudx), x)
        dudzz = fwd_gradients((B_train * dudz), z)

        f_loss = C_train*omega*omega*m_train*u + dudxx + dudzz - ps_train
        loss = torch.sum(torch.square(torch.abs(f_loss)))

        loss.backward()

        loss_values.append(loss.item())

        return loss

    if it % 10 == 0 and it < 30000:
        model.update_grid_from_samples(torch.cat((x, z), dim=1))

    optimizer.step(closure)

    if it % 10 == 0:
        pbar.set_description("loss: %.2e" % (loss.cpu().detach().numpy()))

scipy.io.savemat('loss_KAN.mat', {'loss': loss_values})

torch.save(model.state_dict(), 'trained_KAN_Helm1.pt')

train()


description: 0%| | 0/25000 [00:12<?, ?it/s]
Traceback (most recent call last):
File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 127, in
train()
File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 117, in train
optimizer.step(closure)
File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\optim\optimizer.py", line 140, in wrapper
out = func(*args, **kwargs)
File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "D:\0_Suda_Py\pykan-master\kan\LBFGS.py", line 319, in step
orig_loss = closure()
File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 100, in closure
dudx = fwd_gradients(u, x)
File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 71, in fwd_gradients
G = torch.autograd.grad(Y, x, grad_outputs=dummy, retain_graph=True)[0]
File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd_init_.py", line 302, in grad
allow_unused, accumulate_grad=False) # Calls into the C++ engine to run the backward pass
RuntimeError: One of the differentiated Tensors does not require grad

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants