Skip to content

Commit

Permalink
added logic to feature_modeling & fold cls
Browse files Browse the repository at this point in the history
  • Loading branch information
Injiri committed Mar 12, 2019
1 parent ee96aa3 commit 15ebe50
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
1 change: 0 additions & 1 deletion logic/feature_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,3 @@ def count_grams(headline, body):




39 changes: 38 additions & 1 deletion logic/n_kfold.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,41 @@ def generate_features(stances, dataset, name ):
for stance in stances:
y.append(LABELS.index(stance['Stance']))
h.append(stance['Headline'])
b.append(dataset.articles[stance['Body id']])
b.append(dataset.articles[stance['Body id']])



#Generate folds
if __name__ == "__main__":
versioning()
#load the traing dataset
parameter_parser()
my_dataset = Datasets()
folds,hold_out = kfold_split(my_dataset,n_folds=10)
fold_stances, hold_out_stances = get_stances_4_folds(my_dataset,folds,hold_out)

demo_dateset = Datasets("Demo /test")
X_demo, Y_demo = generate_features(demo_dateset.stances, demo_dateset, "demo")

Xs = dict()
Ys = dict()

#populat all features
X_holdout,y_holdout = generate_features(hold_out_stances, my_dataset,"holder")
for fold in fold_stances:
Xs[fold],Ys[fold] = generate_features(fold_stances[fold] , demo_dateset ,str(fold))

best_score = 0
best_fold = None

for fold in fold_stances:
ids = list(range(len(folds)))
del ids[fold]

X_train = np.vstack(tuple([Xs[i] for i in ids]))
Y_train = np.stack(tuple(Ys[i] for i in ids))

x_demo = Xs[fold]
Y_demo = Ys[fold]


0 comments on commit 15ebe50

Please sign in to comment.