Change seed for MLP

Add RBF stuffs
mpiffari · Aug 13, 2019 · 406ea32 · 406ea32
1 parent 6e18f4f
commit 406ea32
Show file tree

Hide file tree

Showing 9 changed files with 464 additions and 6 deletions.
diff --git a/NN/ML_Perceptrons.m b/NN/ML_Perceptrons.m
@@ -2,6 +2,9 @@
 clear variables
 clc
 
+time = clock;
+seed = time(6);
+rng(seed);
 %% XOR
 bias = -1; % Fixed value setted permanently to -1
 dataset = [0,0,bias; 0,1,bias; 1,0,bias; 1,1,bias]; % Input dataset (u)

diff --git a/NN/Shuffle.m b/NN/Shuffle.m
@@ -1,7 +1,9 @@
  function mat_shuffled = Shuffle(mat, row, column)
- time = clock;
- seed = time(6);
- rng(seed);
+% Initi seed only one time to recreate always the same error and figure out
+% the problem
+% time = clock;
+% seed = time(6);
+% rng(seed);
  indexes = randi(row,row,1)';
  mat_shuffled = zeros(row,(column+1));
  for i = 1:row

diff --git a/RL_Control/CarOnTheMountain_PhysicLaw.m b/RL_Control/CarOnTheMountain_PhysicLaw.m
@@ -6,14 +6,15 @@
 numberOfState = 2; % Position and velocity
 stateSpace = [0;0]; % Column vector
 
-maximumAcceleration = 10;
+maximumAcceleration = 1;
 stepDiscreteAcceleration = 0.01;
 actionSpace = -maximumAcceleration : stepDiscreteAcceleration : maximumAcceleration;
 
+% System physic parameters
 h = 1; % Max height of the mountain [m]
 L = 4; % Length of the valley
 m = 1; % [kg]
-noise = 0;
+g = 9.80665; % Gravity acceleration [m/s^2]
 
 %Time
 t_i = 0;
@@ -23,7 +24,7 @@
 now = 1;
 after = 2;
 position = [0, L/2];
-velocity = [0 , 0];
+velocity = [0 , 0.9];
 
 epsilon = 0.3;
 

diff --git a/RL_Control/CarOnTheMountain_RBF.asv b/RL_Control/CarOnTheMountain_RBF.asv
@@ -0,0 +1,203 @@
+close all
+clear variables
+clc
+
+time = clock;
+seed = time(6);
+rng(seed);
+
+%% Notes
+% No need to use k-means to determine the center mu of each RBF
+
+%% Variables and parameters
+% System physic parameters
+h = 1; % Max height of the mountain [m]
+L = 100; % Length of the valley
+m = 1; % [kg]
+g = 9.80665; % Gravity acceleration [m/s^2]
+maximumVelocity = sqrt(2 * g * h); % [m/s]
+maximumAcceleration = 4; % [m/s]
+% stepDiscreteAcceleration = 0.01;
+
+% State space
+numberOfState = 2; % Position and velocity
+stateSpace = [0;0]; % Column vector
+% actionSpace = -maximumAcceleration : stepDiscreteAcceleration :
+% maximumAcceleration; --> continuos action space
+actionSpace = [- maximumAcceleration, maximumAcceleration];
+
+% RBF parameters
+discret_position = 5;
+discret_velocity = 5;
+
+% SARSA parameters
+epsilon_0 = 0.9;
+gamma = 1;
+learning_rate = 0.4; % Alpha
+isTerminalState = 0;
+
+%Time
+number_of_episode = 100;
+t_i = 0; % [s]
+dt = 0.05; % [s]
+timeout = 10; % [s] = final execution time
+
+now = 1;
+after = 2;
+position = [L/2, 0];
+velocity = [0, 0];
+reward = [0, 0];
+
+fig0 = figure;
+
+%% Center and variance computation
+row = 1;
+position_step = L / (discret_position - 1);
+velocity_step = (maximumVelocity * 2) / (discret_velocity - 1);
+number_of_centrum = discret_position * discret_velocity;
+sigma_position = position_step / sqrt(2 * number_of_centrum);
+sigma_velocity = velocity_step / sqrt(2 * number_of_centrum);
+
+mu = zeros(number_of_centrum, 2);
+sigma = zeros(number_of_centrum, 2); % [sigma_position, sigma_velocity]
+for i= 0 : position_step : L
+ for j = - maximumVelocity : velocity_step : maximumVelocity
+ mu(row, :) = [i,j];
+ row = row + 1;
+ end
+end
+
+for i = 1 : number_of_centrum
+ sigma(i,:) = [sigma_position, sigma_velocity];
+end
+scatter(mu(:,1),mu(:,2), 'filled');
+
+%% Design of the net
+number_of_input = 2; % Number of neurons in the input layer (u_i)
+number_of_hidden_layer = 1; % Number of hidden layers
+number_of_hidden_neuron = number_of_centrum; % Number of neurons for each hidden layer x_j
+number_of_output = 1; % Number of neurons in the output layer (v_k)
+
+% Random initialization of the weights with value [-1,+1]
+% Row: hidden neuron - bias neuron, Column: input neuron
+w_in_hid = ones(number_of_hidden_neuron, number_of_input); % Weigths between input and hidden layer: ALWAYS FIXED TO 1
+% Row: output neuron, Column: hidden neuron
+w_hid_out_positive = -1 + (1+1)*rand(number_of_output, number_of_hidden_neuron + 1);
+w_hid_out_negative = -1 + (1+1)*rand(number_of_output, number_of_hidden_neuron + 1);
+
+phi_positive = rand(number_of_output, number_of_hidden_neuron + 1);
+phi_negative = rand(number_of_output, number_of_hidden_neuron + 1);
+
+%% Algorithm
+
+for episode = 1:number_of_episode
+ stateSpace(:,1) = [position(now); velocity(now)]; % Initialise state
+ sign_of_acc = sign(-1 + (1+1)*rand()); % Random initialization of the very first action
+ a_t = sign_of_acc * maximumAcceleration;
+ t = t_i;
+ isTerminal = 0;
+ while isTerminalState == 0 || t < timeout
+ %%%%%%%%%%%%%% Take action %%%%%%%%%%%%%%%%%%%%
+ x = position(now);
+ params = Parameters(x, h, m, L);
+ A = params(1);
+ B = params(2);
+ C = params(3);
+ D = params(4);
+
+ velocity(after) = velocity(now) + dt * (1/A)*(m * (a_t/D) - C - B * (velocity(now))^2);
+ position(after) = x + dt * velocity(now);
+ reward(after) = Reward(position(now), L);
+
+ if position(after) < 0
+ position(after) = 0;
+ velocity(after) = 0;
+ elseif position(after) >= L
+ disp('####### YEEEE ##########')
+ % Q value calculation: Q = output of the RBF net = linear
+ % combination of weigth and phi function
+ % Phi function calculation: phi_positive = phi_negative
+ phi_positive = Phi_calcultation(stateSpace, mu, sigma);
+ phi_negative = phi_positive;
+
+ %Update weigth
+ if a_t > 0
+ % Update positive weigths
+ w_hid_out_positive = w_hid_out_positive + learning_rate
+ else
+ % Update negative weigths
+ w_hid_out_negative = w_hid_out_negative + learning_rate
+ end
+
+ isTerminal = 1;
+ break
+ else
+
+
+ t = t + dt;
+ end
+
+end
+
+%% Old alg
+for t = t_i : dt : t_f
+
+ % Take the the action : greedy way
+
+ % phi calculation based on actual position
+
+
+ Q_minus = 0 * w_hid_out_negative;
+ Q_plus = 0 * w_hid_out_positive;
+
+ if Q_minus > Q_plus
+ a_t = - maximumAcceleration;
+ else
+ a_t = maximumAcceleration;
+ end
+
+ action = rand();
+ epsilon = epsilon_0 / t;
+ if action < epsilons
+ sign_of_acc = sign(-1 + (1+1)*rand());
+ a_t = sign_of_acc * maximumAcceleration;
+ end
+
+ %% Calculation with physic equation
+
+
+ %reward calc
+ % if s_(t+1) is terminal
+
+ if position(after) < 0
+ position(after) = 0;
+ velocity(after) = 0;
+ elseif position(after) >= L
+ disp('####### YEEE ########');
+ break
+ else
+ % Update weigth
+ end
+ stateSpace(1,1) = position(after);
+ stateSpace(2,1) = velocity(after);
+
+ %% Plot
+ figure(fig0);
+ clf(fig0)
+ hold on
+
+ max = h;
+ min = -1;
+ x_mountains = min: 0.01 : L;
+ y_mountains = zeros(length(x_mountains),1);
+ for i = 1:length(x_mountains)
+ y_mountains(i,1) = Profile(x_mountains(i),L,h);
+ end
+ plot(x_mountains, y_mountains);
+ y_point = Profile(position(after),L,h);
+ y_target = max;
+ scatter(position(after), y_point, 'filled');
+ scatter(L, y_target, '*');
+ ylim([0 max+1]);
+ xlim([min L+1]);
+end