# debug field import os os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import argparse # get argument from user parser = argparse.ArgumentParser() parser.add_argument('--path', type = str, required = True, help="pretrained model weight path") args = parser.parse_args() STORE_PATH = args.path # validate the weight path if not os.path.isdir(STORE_PATH): raise NotImplementedError("The provided weight path does not exist!") # get DRL_ALGO if 'td3' in STORE_PATH: DRL_ALGO = 'td3' else: DRL_ALGO = 'ddpg' # reward design if 'see' in STORE_PATH: REWARD_DESIGN = 'see' else: REWARD_DESIGN = 'ssr' # seeds and episode number SEEDS = None # process the argument assert DRL_ALGO in ['ddpg', 'td3'], "drl must be ['ddpg', 'td3']" assert REWARD_DESIGN in ['ssr', 'see'], "reward must be ['ssr', 'see']" if SEEDS is not None: assert len(SEEDS) in [1, 2] and isinstance(SEEDS[0], int) and isinstance(SEEDS[-1], int), "seeds must be a list of 1 or 2 integer" if DRL_ALGO == 'td3': from td3 import Agent elif DRL_ALGO == 'ddpg': from ddpg import Agent import ddpg from env import MiniSystem import numpy as np import math import time import torch import shutil # 1 init system model episode_num = 1 episode_cnt = 0 step_num = 100 project_name = STORE_PATH system = MiniSystem( user_num=2, RIS_ant_num=4, UAV_ant_num=4, if_dir_link=1, if_with_RIS=True, if_move_users=True, if_movements=True, reverse_x_y=(False, False), if_UAV_pos_state = True, reward_design = REWARD_DESIGN, project_name = project_name, step_num = step_num ) if_Theta_fixed = False if_G_fixed = False if_BS = False if_robust = True # 2 init RL Agent agent_1_param_dic = {} agent_1_param_dic["alpha"] = 0.0001 agent_1_param_dic["beta"] = 0.001 agent_1_param_dic["input_dims"] = system.get_system_state_dim() agent_1_param_dic["tau"] = 0.001 agent_1_param_dic["batch_size"] = 64 agent_1_param_dic["n_actions"] = system.get_system_action_dim() - 2 agent_1_param_dic["action_noise_factor"] = 0.1 agent_1_param_dic["memory_max_size"] = int(5/5 * episode_num * step_num) #/2 agent_1_param_dic["agent_name"] = "G_and_Phi" agent_1_param_dic["layer1_size"] = 800 agent_1_param_dic["layer2_size"] = 600 agent_1_param_dic["layer3_size"] = 512 agent_1_param_dic["layer4_size"] = 256 agent_2_param_dic = {} agent_2_param_dic["alpha"] = 0.0001 agent_2_param_dic["beta"] = 0.001 agent_2_param_dic["input_dims"] = 3 agent_2_param_dic["tau"] = 0.001 agent_2_param_dic["batch_size"] = 64 agent_2_param_dic["n_actions"] = 2 agent_2_param_dic["action_noise_factor"] = 0.5 agent_2_param_dic["memory_max_size"] = int(5/5 * episode_num * step_num) #/2 agent_2_param_dic["agent_name"] = "UAV" agent_2_param_dic["layer1_size"] = 400 agent_2_param_dic["layer2_size"] = 300 agent_2_param_dic["layer3_size"] = 256 agent_2_param_dic["layer4_size"] = 128 if SEEDS is not None: torch.manual_seed(SEEDS[0]) # 1 torch.cuda.manual_seed_all(SEEDS[0]) # 1 agent_1 = Agent( alpha = agent_1_param_dic["alpha"], beta = agent_1_param_dic["beta"], input_dims = [agent_1_param_dic["input_dims"]], tau = agent_1_param_dic["tau"], env = system, batch_size = agent_1_param_dic["batch_size"], layer1_size=agent_1_param_dic["layer1_size"], layer2_size=agent_1_param_dic["layer2_size"], layer3_size=agent_1_param_dic["layer3_size"], layer4_size=agent_1_param_dic["layer4_size"], n_actions = agent_1_param_dic["n_actions"], max_size = agent_1_param_dic["memory_max_size"], agent_name= agent_1_param_dic["agent_name"] ) if SEEDS is not None: torch.manual_seed(SEEDS[-1]) # 2 torch.cuda.manual_seed_all(SEEDS[-1]) # 2 agent_2 = Agent( alpha = agent_2_param_dic["alpha"], beta = agent_2_param_dic["beta"], input_dims = [agent_2_param_dic["input_dims"]], tau = agent_2_param_dic["tau"], env = system, batch_size = agent_2_param_dic["batch_size"], layer1_size=agent_2_param_dic["layer1_size"], layer2_size=agent_2_param_dic["layer2_size"], layer3_size=agent_2_param_dic["layer3_size"], layer4_size=agent_2_param_dic["layer4_size"], n_actions = agent_2_param_dic["n_actions"], max_size = agent_2_param_dic["memory_max_size"], agent_name= agent_2_param_dic["agent_name"] ) if DRL_ALGO == 'td3': agent_1.load_models( load_file_actor = STORE_PATH + '/Actor_G_and_Phi_TD3', load_file_critic_1 = STORE_PATH + '/Critic_1_G_and_Phi_TD3', load_file_critic_2 = STORE_PATH + '/Critic_2_G_and_Phi_TD3' ) agent_2.load_models( load_file_actor = STORE_PATH + '/Actor_UAV_TD3', load_file_critic_1 = STORE_PATH + '/Critic_1_UAV_TD3', load_file_critic_2 = STORE_PATH + '/Critic_2_UAV_TD3' ) elif DRL_ALGO == 'ddpg': agent_1.load_models( load_file_actor = STORE_PATH + '/Actor_G_and_Phi_ddpg', load_file_critic = STORE_PATH + '/Critic_G_and_Phi_ddpg' ) agent_2.load_models( load_file_actor = STORE_PATH + '/Actor_UAV_ddpg', load_file_critic = STORE_PATH + '/Critic_UAV_ddpg' ) meta_dic = {} print("***********************system information******************************") print("folder_name: "+str(system.data_manager.store_path)) meta_dic['folder_name'] = system.data_manager.store_path print("user_num: "+str(system.user_num)) meta_dic['user_num'] = system.user_num print("if_dir: "+str(system.if_dir_link)) meta_dic['if_dir_link'] = system.if_dir_link print("if_with_RIS: "+str(system.if_with_RIS)) meta_dic['if_with_RIS'] = system.if_with_RIS print("if_user_m: "+str(system.if_move_users)) meta_dic['if_move_users'] = system.if_move_users print("RIS_ant_num: "+str(system.RIS.ant_num)) meta_dic['system_RIS_ant_num'] = system.RIS.ant_num print("UAV_ant_num: "+str(system.UAV.ant_num)) meta_dic['system_UAV_ant_num'] = system.UAV.ant_num print("if_movements: "+str(system.if_movements)) meta_dic['system_if_movements'] = system.if_movements print("reverse_x_y: "+str(system.reverse_x_y)) meta_dic['system_reverse_x_y'] = system.reverse_x_y print("if_UAV_pos_state:"+str(system.if_UAV_pos_state)) meta_dic['if_UAV_pos_state'] = system.if_UAV_pos_state print("ep_num: "+str(episode_num)) meta_dic['episode_num'] = episode_num print("step_num: "+str(step_num)) meta_dic['step_num'] = step_num print("***********************agent_1 information******************************") tplt = "{0:{2}^20}\t{1:{2}^20}" for i in agent_1_param_dic: parm = agent_1_param_dic[i] print(tplt.format(i, parm, chr(12288))) meta_dic["agent_1"] = agent_1_param_dic print("***********************agent_2 information******************************") for i in agent_2_param_dic: parm = agent_2_param_dic[i] print(tplt.format(i, parm, chr(12288))) meta_dic["agent_2"] = agent_2_param_dic system.data_manager.save_meta_data(meta_dic) print("***********************traning information******************************") try: while episode_cnt < episode_num: # 1 reset the whole system system.reset() step_cnt = 0 score_per_ep = 0 # 2 get the initial state if if_robust: tmp = system.observe() #z = np.random.multivariate_normal(np.zeros(2), 0.5*np.eye(2), size=len(tmp)).view(np.complex128) z = np.random.normal(size=len(tmp)) observersion_1 = list( np.array(tmp) + 0.6 *1e-7* z ) else: observersion_1 = system.observe() observersion_2 = list(system.UAV.coordinate) while step_cnt < step_num: # 1 count num of step in one episode step_cnt += 1 # judge if pause the whole system if not system.render_obj.pause: # 2 choose action acoording to current state action_1 = agent_1.choose_action(observersion_1, greedy=agent_1_param_dic["action_noise_factor"] * math.pow((1-episode_cnt / episode_num), 2)) action_2 = agent_2.choose_action(observersion_2, greedy=agent_2_param_dic["action_noise_factor"]* math.pow((1-episode_cnt / episode_num), 2)) if if_BS: action_2[0]=0 action_2[1]=0 if if_Theta_fixed: action_1[0+2 * system.UAV.ant_num * system.user_num:] = len(action_1[0+2 * system.UAV.ant_num * system.user_num:])*[0] if if_G_fixed: action_1[0:0+2 * system.UAV.ant_num * system.user_num]=np.array([-0.0313, -0.9838, 0.3210, 1.0, -0.9786, -0.1448, 0.3518, 0.5813, -1.0, -0.2803, -0.4616, -0.6352, -0.1449, 0.7040, 0.4090, -0.8521]) * math.pow(episode_cnt / episode_num, 2) * 0.7 #action_1[0:0+2 * system.UAV.ant_num * system.user_num]=len(action_1[0:0+2 * system.UAV.ant_num * system.user_num])*[0.5] # 3 get newstate, reward if system.if_with_RIS: new_state_1, reward, done, info = system.step( action_0=action_2[0], action_1=action_2[1], G=action_1[0:0+2 * system.UAV.ant_num * system.user_num], Phi=action_1[0+2 * system.UAV.ant_num * system.user_num:], set_pos_x=action_2[0], set_pos_y=action_2[1] ) new_state_2 = list(system.UAV.coordinate) else: new_state_1, reward, done, info = system.step( action_0=action_2[0], action_1=action_2[1], G=action_1[0:0+2 * system.UAV.ant_num * system.user_num], set_pos_x=action_2[0], set_pos_y=action_2[1] ) new_state_2 = list(system.UAV.coordinate) score_per_ep += reward # render system.render_obj.render(0.001) # no rendering for faster observersion_1 = new_state_1 observersion_2 = new_state_2 if done == True: break else: system.render_obj.render_pause() # no rendering for faster time.sleep(0.001) #time.sleep(1) system.reset() print("ep_num: "+str(episode_cnt)+" ep_score: "+str(score_per_ep)) episode_cnt +=1 except KeyboardInterrupt: raise KeyboardInterrupt finally: shutil.rmtree('data/storage/data')