Skip to content

Commit

Permalink
voting for pred
Browse files Browse the repository at this point in the history
  • Loading branch information
MegaJoctan committed Jan 17, 2024
1 parent c9edb77 commit edc16a1
Showing 1 changed file with 132 additions and 75 deletions.
207 changes: 132 additions & 75 deletions Ensemble/AdaBoost.mqh
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,29 @@
#include <MALE5\Linear Models\Logistic Regression.mqh>

//+------------------------------------------------------------------+
//| AdaBoost class |
//| AdaBoost class for Decision Tree |
//+------------------------------------------------------------------+
namespace CAdaBoostDecisionTree
namespace DecisionTree
{
class CAdaBoost
class AdaBoost
{

protected:
vector m_alphas;
matrix m_weights;
vector classes_in_data;
int m_random_state;
bool m_boostrapping;
uint m_min_split, m_max_split;

CDecisionTreeClassifier *weak_learners[]; //store weak_learner pointers for memory allocation tracking
CDecisionTreeClassifier *weak_learner;

CDecisionTreeClassifier models[]; //store model pointers for memory allocation tracking
CDecisionTreeClassifier model;

uint m_estimators;

public:
CAdaBoost(CDecisionTreeClassifier &base_model, uint n_estimators=50, int random_state=42);
~CAdaBoost(void);
AdaBoost(uint min_split, uint max_split, uint n_estimators=50, int random_state=42, bool bootstrapping=true);
~AdaBoost(void);

void fit(matrix &x, vector &y);
int predict(vector &x);
Expand All @@ -47,85 +51,111 @@ public:
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CAdaBoost::CAdaBoost(CDecisionTreeClassifier &base_model, uint n_estimators=50, int random_state=42)
:m_estimators(n_estimators)
AdaBoost::AdaBoost(uint min_split, uint max_split, uint n_estimators=50, int random_state=42, bool bootstrapping=true)
:m_estimators(n_estimators),
m_random_state(random_state),
m_boostrapping(bootstrapping),
m_min_split(min_split),
m_max_split(max_split)
{
ArrayResize(models, n_estimators);
model = base_model;
ArrayResize(weak_learners, n_estimators); //Resizing the array to retain the number of base weak_learners
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CAdaBoost::~CAdaBoost(void)
AdaBoost::~AdaBoost(void)
{
//for (uint i=0; i<models.Size(); i++)
// if (CheckPointer(models[i]) != POINTER_INVALID)
// delete models[i];
for (uint i=0; i<m_estimators; i++) //Delete the forest | all trees
if (CheckPointer(weak_learners[i]) != POINTER_INVALID)
delete(weak_learners[i]);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CAdaBoost::fit(matrix &x,vector &y)
void AdaBoost::fit(matrix &x,vector &y)
{
m_alphas.Resize(y.Size(), m_estimators);
m_weights.Resize(y.Size(), m_estimators);


m_alphas.Resize(m_estimators);
classes_in_data = MatrixExtend::Unique(y); //Find the target variables in the class

ulong m = x.Rows(), n = x.Cols();
vector weights(m); weights = weights.Fill(1.0) / m; //Initialize instance weights
vector preds(m);
vector misclassified(m);

//---

matrix data = MatrixExtend::concatenate(x, y);
matrix temp_data;

matrix x_subset;
vector y_subset;

double error = 0;

for (uint i=0; i<m_estimators; i++)
{
model.fit(x, y);
preds = model.predict(x);

for (ulong j=0; j<m; j++) misclassified[j] = (preds[j] != y[j]);

temp_data = data;
MatrixExtend::Randomize(temp_data, this.m_random_state, this.m_boostrapping);

if (!MatrixExtend::XandYSplitMatrices(temp_data, x_subset, y_subset)) //Get randomized subsets
{
ArrayRemove(weak_learners,i,1); //Delete the invalid weak_learner
printf("%s %d Failed to split data",__FUNCTION__,__LINE__);
continue;
}

//---

weak_learner = new CDecisionTreeClassifier(this.m_min_split, m_max_split);

weak_learner.fit(x_subset, y_subset); //fiting the randomized data to the i-th weak_learner
preds = weak_learner.predict(x_subset); //making predictions for the i-th weak_learner

//printf("[%d] Accuracy %.3f ",i,Metrics::accuracy_score(y_subset, preds));

for (ulong j=0; j<m; j++)
misclassified[j] = (preds[j] != y_subset[j]);

error = (misclassified * weights).Sum() / (double)weights.Sum();

//--- Calculate the weight of a weak learner in the final model
//--- Calculate the weight of a weak learner in the final weak_learner

double alpha = 0.5 * log((1-error) / (error + 1e-10));

//--- Update instance weights

weights *= exp(-alpha * y * preds);
weights *= exp(-alpha * y_subset * preds);
weights /= weights.Sum();

//--- save a weak learner and its weight

this.m_alphas[i] = alpha;
this.models[i] = model;
this.weak_learners[i] = weak_learner;
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
int CAdaBoost::predict(vector &x)
int AdaBoost::predict(vector &x)
{
// Combine weak learners using weighted sum

vector weak_preds(m_estimators),
final_preds(m_estimators);

for (uint i=0; i<this.m_estimators; i++)
weak_preds[i] = this.models[i].predict(x);
weak_preds[i] = this.weak_learners[i].predict(x);

final_preds = MatrixExtend::Sign(MatrixExtend::VectorToMatrix(this.m_alphas, this.m_alphas.Size()).MatMul(MatrixExtend::VectorToMatrix(weak_preds, weak_preds.Size())));
//Print("weak preds: ",weak_preds);
//Print("Alphas: ",this.m_alphas);

if (MQLInfoInteger(MQL_DEBUG))
Print("Final preds: ",final_preds);

return (int)final_preds[0];
return (int)weak_preds[(this.m_alphas*weak_preds).ArgMax()]; //Majority decision
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector CAdaBoost::predict(matrix &x)
vector AdaBoost::predict(matrix &x)
{
vector ret_v(x.Rows());
for (ulong i=0; i<ret_v.Size(); i++)
Expand All @@ -135,24 +165,30 @@ vector CAdaBoost::predict(matrix &x)
}
}

namespace CAdaBoostLogReg
//+------------------------------------------------------------------+
//| Adaboost for Logistic Regression |
//+------------------------------------------------------------------+

namespace LogisticRegression
{

class CAdaBoost
class AdaBoost
{

protected:
vector m_alphas;
matrix m_weights;
vector classes_in_data;
int m_random_state;
bool m_boostrapping;

CLogisticRegression *weak_learners[]; //store weak_learner pointers for memory allocation tracking
CLogisticRegression *weak_learner;

CLogisticRegression models[]; //store model pointers for memory allocation tracking
CLogisticRegression model;

uint m_estimators;

public:
CAdaBoost(CLogisticRegression &base_model, uint n_estimators=50, int random_state=42);
~CAdaBoost(void);
AdaBoost(CLogisticRegression &base_model, uint n_estimators=50, int random_state=42, bool bootstrapping=true);
~AdaBoost(void);

void fit(matrix &x, vector &y);
int predict(vector &x);
Expand All @@ -161,91 +197,112 @@ public:
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CAdaBoost::CAdaBoost(CLogisticRegression &base_model, uint n_estimators=50, int random_state=42)
:m_estimators(n_estimators)
AdaBoost::AdaBoost(CLogisticRegression &base_model, uint n_estimators=50, int random_state=42, bool bootstrapping=true)
:m_estimators(n_estimators),
m_random_state(random_state),
m_boostrapping(bootstrapping)
{
ArrayResize(models, n_estimators);
model = base_model;
ArrayResize(weak_learners, n_estimators);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CAdaBoost::~CAdaBoost(void)
AdaBoost::~AdaBoost(void)
{
//for (uint i=0; i<models.Size(); i++)
// if (CheckPointer(models[i]) != POINTER_INVALID)
// delete models[i];
for (uint i=0; i<m_estimators; i++) //Delete the forest | all trees
if (CheckPointer(weak_learners[i]) != POINTER_INVALID)
delete(weak_learners[i]);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CAdaBoost::fit(matrix &x,vector &y)
void AdaBoost::fit(matrix &x,vector &y)
{
m_alphas.Resize(y.Size(), m_estimators);
m_weights.Resize(y.Size(), m_estimators);


m_alphas.Resize(m_estimators);
classes_in_data = MatrixExtend::Unique(y); //Find the target variables in the class

ulong m = x.Rows(), n = x.Cols();
vector weights(m); weights = weights.Fill(1.0) / m; //Initialize instance weights
vector preds(m);
vector misclassified(m);

//---

matrix data = MatrixExtend::concatenate(x, y);
matrix temp_data;

matrix x_subset;
vector y_subset;

double error = 0;

for (uint i=0; i<m_estimators; i++)
{
model.fit(x, y);

preds = model.predict(x);
temp_data = data;
MatrixExtend::Randomize(temp_data, this.m_random_state, this.m_boostrapping);

if (!MatrixExtend::XandYSplitMatrices(temp_data, x_subset, y_subset)) //Get randomized subsets
{
ArrayRemove(weak_learners,i,1); //Delete the invalid weak_learner
printf("%s %d Failed to split data",__FUNCTION__,__LINE__);
continue;
}

//---

weak_learner.fit(x_subset, y_subset); //fiting the randomized data to the i-th weak_learner
preds = weak_learner.predict(x_subset); //making predictions for the i-th weak_learner

for (ulong j=0; j<m; j++) misclassified[j] = (preds[j] != y[j]);
for (ulong j=0; j<m; j++)
misclassified[j] = (preds[j] != y_subset[j]);

error = (misclassified * weights).Sum() / (double)weights.Sum();

//--- Calculate the weight of a weak learner in the final model
//--- Calculate the weight of a weak learner in the final weak_learner

double alpha = 0.5 * log((1-error) / error);
double alpha = 0.5 * log((1-error) / (error + 1e-10));

//--- Update instance weights

weights *= exp(-alpha * y * preds);
weights *= exp(-alpha * y_subset * preds);
weights /= weights.Sum();

//--- save a weak learner and its weight

this.m_alphas[i] = alpha;
this.models[i] = model;
this.weak_learners[i] = weak_learner;
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
int CAdaBoost::predict(vector &x)
int AdaBoost::predict(vector &x)
{
// Combine weak learners using weighted sum

vector weak_preds(m_estimators),
final_preds(m_estimators);

for (uint i=0; i<this.m_estimators; i++)
weak_preds[i] = this.models[i].predict(x);

final_preds = MatrixExtend::Sign(MatrixExtend::VectorToMatrix(this.m_alphas, this.m_alphas.Size()).MatMul(weak_preds));

if (MQLInfoInteger(MQL_DEBUG))
Print("Final preds: ",final_preds);

return (int)final_preds[0];
weak_preds[i] = this.weak_learners[i].predict(x);

final_preds = MatrixExtend::Sign(this.m_alphas * weak_preds);

return (int)classes_in_data[MatrixExtend::Unique_count(classes_in_data).ArgMax()]; //Majority vote decision
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector CAdaBoost::predict(matrix &x)
vector AdaBoost::predict(matrix &x)
{
vector ret_v(x.Rows());
for (ulong i=0; i<ret_v.Size(); i++)
ret_v[i] = this.predict(x.Row(i));

return ret_v;
}
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+

0 comments on commit edc16a1

Please sign in to comment.