Add files via upload

yjwong1999 · Apr 5, 2023 · 26eb023 · 26eb023
1 parent bac37df
commit 26eb023
Show file tree

Hide file tree

Showing 7 changed files with 139 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -42,13 +42,13 @@ python3 main_test.py --drl td3 --reward see
 
 
 
-# To use pretrained DRL for UAV trajectory (todo for next update)
+# To use pretrained DRL for UAV trajectory (recommended for stable convergence)
 python3 main_test.py --drl td3 --reward see --trained_uav
 
 # To set number of episodes (default is 300)
 python3 main_test.py --drl td3 --reward see --ep_num 300
 
-# To set seeds (can be any integer) for DRL weight initialization (not recommended)
+# To set seeds for DRL weight initialization (not recommended if you use pretrained uav)
 python3 main_test.py --drl td3 --reward see --seeds 0 # weights of both DRL are initialized with seed 0
 python3 main_test.py --drl td3 --reward see --seeds 0 1 # weights of DRL 1 and DRL2 are initialized with seed 0 and 1, respectively
 ```
@@ -100,6 +100,8 @@ Summary
 \* Remarks: </br>
 Note that the performance of DRL (especially twin DRL) has a big variation, sometimes you may get extremely good (or bad) performance </br>
 The above benchmark results are averaged performance of several experiments, to get a more holistic understandings on the algorithms </br>
+It is advised to use the benchmark UAV models we trained, for better convergence. </br>
+This approach is consistent with the codes provided by [TDDRL](https://github.com/Brook1711/WCL-pulish-code)
 
 ## References and Acknowledgement
 
@@ -135,6 +137,6 @@ Main reference for TD3 implementation: </br>
 - [x] Add argparse arguments to set episode number
 - [x] Add argparse arguments to set seeds for the two DRLs
 - [x] Add argparse arguments to load pretrained DRL for UAV trajectory
-- [ ] Add benchmark/pretrained model
-- [ ] Project naming (use ```<DRL>_<Reward>_<Num>``` instead of using datetime format)
-- [x] Remove saving "best model", there are no best model, only latest model
+- [x] Add benchmark/pretrained model
+- [ ] Project naming (use <DRL>_<Reward>_<Num> instead of using datetime format)
+- [ ] Remove saving "best model", there are no best model, only latest model
diff --git a/batch_eval.sh b/batch_eval.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+echo 
+
+echo ddpg_ssr
+python3 load_and_plot.py --path data/storage/ddpg_ssr --ep_num 300
+echo ddpg_ssr_2
+python3 load_and_plot.py --path data/storage/ddpg_ssr_2 --ep_num 300
+echo ddpg_ssr_3
+python3 load_and_plot.py --path data/storage/ddpg_ssr_3 --ep_num 300
+echo ddpg_ssr_4
+python3 load_and_plot.py --path data/storage/ddpg_ssr_4 --ep_num 300
+echo ddpg_ssr_5
+python3 load_and_plot.py --path data/storage/ddpg_ssr_5 --ep_num 300
+
+
+echo td3_ssr
+python3 load_and_plot.py --path data/storage/td3_ssr --ep_num 300
+echo td3_ssr_2
+python3 load_and_plot.py --path data/storage/td3_ssr_2 --ep_num 300
+echo td3_ssr_3
+python3 load_and_plot.py --path data/storage/td3_ssr_3 --ep_num 300
+echo td3_ssr_4
+python3 load_and_plot.py --path data/storage/td3_ssr_4 --ep_num 300
+echo td3_ssr_5
+python3 load_and_plot.py --path data/storage/td3_ssr_5 --ep_num 300
+
+
+echo ddpg_see
+python3 load_and_plot.py --path data/storage/ddpg_see --ep_num 300
+echo ddpg_see_2
+python3 load_and_plot.py --path data/storage/ddpg_see_2 --ep_num 300
+echo ddpg_see_3
+python3 load_and_plot.py --path data/storage/ddpg_see_3 --ep_num 300
+echo ddpg_see_4
+python3 load_and_plot.py --path data/storage/ddpg_see_4 --ep_num 300
+echo ddpg_see_5
+python3 load_and_plot.py --path data/storage/ddpg_see_5 --ep_num 300
+
+
+echo td3_see
+python3 load_and_plot.py --path data/storage/td3_see --ep_num 300
+echo td3_see_2
+python3 load_and_plot.py --path data/storage/td3_see_2 --ep_num 300
+echo td3_see_3
+python3 load_and_plot.py --path data/storage/td3_see_3 --ep_num 300
+echo td3_see_4
+python3 load_and_plot.py --path data/storage/td3_see_4 --ep_num 300
+echo td3_see_5
+python3 load_and_plot.py --path data/storage/td3_see_5 --ep_num 300
+
+
diff --git a/batch_train.sh b/batch_train.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+echo 
+
+echo ddpg_ssr
+python3 main_test.py --drl ddpg --reward ssr --ep_num 300 --trained_uav
+echo ddpg_ssr_2
+python3 main_test.py --drl ddpg --reward ssr --ep_num 300 --trained_uav
+echo ddpg_ssr_3
+python3 main_test.py --drl ddpg --reward ssr --ep_num 300 --trained_uav
+echo ddpg_ssr_4
+python3 main_test.py --drl ddpg --reward ssr --ep_num 300 --trained_uav
+echo ddpg_ssr_5
+python3 main_test.py --drl ddpg --reward ssr --ep_num 300 --trained_uav
+
+echo td3_ssr
+python3 main_test.py --drl td3 --reward ssr --ep_num 300 --trained_uav
+echo td3_ssr_2
+python3 main_test.py --drl td3 --reward ssr --ep_num 300 --trained_uav
+echo td3_ssr_3
+python3 main_test.py --drl td3 --reward ssr --ep_num 300 --trained_uav
+echo td3_ssr_4
+python3 main_test.py --drl td3 --reward ssr --ep_num 300 --trained_uav
+echo td3_ssr_5
+python3 main_test.py --drl td3 --reward ssr --ep_num 300 --trained_uav
+
+echo ddpg_see
+python3 main_test.py --drl ddpg --reward see --ep_num 300 --trained_uav
+echo ddpg_see_2
+python3 main_test.py --drl ddpg --reward see --ep_num 300 --trained_uav
+echo ddpg_see_3
+python3 main_test.py --drl ddpg --reward see --ep_num 300 --trained_uav
+echo ddpg_see_4
+python3 main_test.py --drl ddpg --reward see --ep_num 300 --trained_uav
+echo ddpg_see_5
+python3 main_test.py --drl ddpg --reward see --ep_num 300 --trained_uav
+
+echo td3_see
+python3 main_test.py --drl td3 --reward see --ep_num 300 --trained_uav
+echo td3_see_2
+python3 main_test.py --drl td3 --reward see --ep_num 300 --trained_uav
+echo td3_see_3
+python3 main_test.py --drl td3 --reward see --ep_num 300 --trained_uav
+echo td3_see_4
+python3 main_test.py --drl td3 --reward see --ep_num 300 --trained_uav
+echo td3_see_5
+python3 main_test.py --drl td3 --reward see --ep_num 300 --trained_uav
diff --git a/data_manager.py b/data_manager.py
@@ -2,7 +2,7 @@
 import scipy.io
 import pandas as pd
 import os
-import time
+import time, csv
 
 class DataManager(object):
  """
@@ -23,6 +23,11 @@ def __init__(self, store_list = ['beamforming_matrix', 'reflecting_coefficient',
  self.init_format()
 
  def save_file(self, episode_cnt = 10):
+ # record step counts per episode
+ with open(self.store_path + "/step_num_per_episode.csv", "a") as f:
+ writer = csv.writer(f)
+ writer.writerow([len(list(self.simulation_result_dic.values())[0])])
+
  # when ended, auto save to .mat file
  scipy.io.savemat(self.store_path + '/simulation_result_ep_' + str(episode_cnt) + '.mat', {'result_' + str(episode_cnt):self.simulation_result_dic})
  self.simulation_result_dic = {}

diff --git a/env.py b/env.py
@@ -60,7 +60,7 @@ class MiniSystem(object):
  define mini RIS communication system with one UAV
  and one RIS and one user, one attacker
  """
- def __init__(self, UAV_num = 1, RIS_num = 1, user_num = 1, attacker_num = 1, fre = 28e9, RIS_ant_num = 16, UAV_ant_num=8, if_dir_link = 1, if_with_RIS = True, if_move_users = True, if_movements = True, reverse_x_y = (True, True), if_UAV_pos_state = True, reward_design = 'ssr'):
+ def __init__(self, UAV_num = 1, RIS_num = 1, user_num = 1, attacker_num = 1, fre = 28e9, RIS_ant_num = 16, UAV_ant_num=8, if_dir_link = 1, if_with_RIS = True, if_move_users = True, if_movements = True, reverse_x_y = (True, True), if_UAV_pos_state = True, reward_design = 'ssr', step_num=100):
  self.if_dir_link = if_dir_link
  self.if_with_RIS = if_with_RIS
  self.if_move_users = if_move_users
@@ -109,6 +109,9 @@ def __init__(self, UAV_num = 1, RIS_num = 1, user_num = 1, attacker_num = 1, fre
 
  # 1.6 reward design
  self.reward_design = reward_design # reward_design is ['ssr' or 'see']
+
+ # 1.7 step_num
+ self.step_num = step_num
 
  # 2.init channel
  self.H_UR = mmWave_channel(self.UAV, self.RIS, fre)

diff --git a/load_and_plot.py b/load_and_plot.py
@@ -6,6 +6,7 @@
 import os
 import copy
 import math
+import csv
 
 import argparse
 
@@ -161,6 +162,15 @@ def plot(self):
 
  color_list = ['b', 'g', 'c', 'k', 'm', 'r', 'y']
 
+
+ ###############################
+ # read step counts per episode
+ ###############################
+ step_num_per_episode = []
+ with open(self.store_path + 'step_num_per_episode.csv', newline='') as csvfile:
+ reader = csv.reader(csvfile)
+ for row in reader:
+ step_num_per_episode.append(int(row[0]))
 
  ###############################
  # plot reward
@@ -194,8 +204,11 @@ def plot(self):
  sum_secrecy_rate = np.sum(sum_secrecy_rate, axis = 0)
  average_sum_secrecy_rate = []
  ssr = []
- for i in range(0, self.ep_num * self.step_num, self.step_num):
- ssr_one_episode = sum_secrecy_rate[i:i+self.step_num] # ssr means Sum Secrecy Rate
+ j = 0
+ for i in range(self.ep_num):
+ ssr_one_episode = sum_secrecy_rate[j:j+step_num_per_episode[i]] # ssr means Sum Secrecy Rate
+ #print(j, j+step_num_per_episode[i])
+ j = j+step_num_per_episode[i]
  ssr.append(ssr_one_episode)
  try:
  _ = sum(ssr_one_episode) / len(ssr_one_episode)

diff --git a/main_test.py b/main_test.py
@@ -40,6 +40,10 @@
 import torch
 
 # 1 init system model
+episode_num = EPISODE_NUM # recommend to be 300
+episode_cnt = 0
+step_num = 100
+
 system = MiniSystem(
  user_num=2,
  RIS_ant_num=4,
@@ -50,18 +54,17 @@
  if_movements=True,
  reverse_x_y=(False, False),
  if_UAV_pos_state = True,
- reward_design = REWARD_DESIGN
+ reward_design = REWARD_DESIGN,
+ step_num = step_num
  )
+
 if_Theta_fixed = False
 if_G_fixed = False
 if_BS = False
 if_robust = True
-# 2 init RL Agent
 
-episode_num = EPISODE_NUM # recommend to be 300
-episode_cnt = 0
-step_num = 100
 
+# 2 init RL Agent
 agent_1_param_dic = {}
 agent_1_param_dic["alpha"] = 0.0001
 agent_1_param_dic["beta"] = 0.001