Skip to content

Commit

Permalink
added grid search back
Browse files Browse the repository at this point in the history
  • Loading branch information
ginesiametlle committed May 23, 2018
1 parent 382574f commit 144f50f
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 114 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

### About this repository

An easy to use universal semantic tagger.
This repository provides an easy to use universal semantic tagger.

A recent version of Python 3 with the packages listed in [requirements.txt](./requirements.txt) are expected.
A recent version of Python 3 with the packages listed in [requirements.txt](./requirements.txt) is expected.

### Training a neural model

Expand All @@ -24,6 +24,12 @@ One can edit [config.sh](./config.sh) for fine control over the employed feature

Note that trained models are stored/loaded using the directory defined in [config.sh](./config.sh) when the ```--model``` option is not provided.

### Comments

It is advisable to run a [_tokenizer_](http:https://gmb.let.rug.nl/elephant/about.php) on your data.

If you have the means to identify multiword expressions, you can represent them as a single token using tildes/hyphens, as in ```ice~cream``` or ```ice-cream```.

### References

1. L. Abzianidze and J. Bos. [_Towards Universal Semantic Tagging_](http:https://www.aclweb.org/anthology/W17-6901). In Proceedings of the 12th International Conference on Computational Semantics (IWCS) - Short papers. Association for Computational Linguistics, 2017.
Expand Down
8 changes: 0 additions & 8 deletions config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,3 @@ MODEL_BATCH_NORMALIZATION=0
# keras verbosity mode (int, default: 1)
MODEL_VERBOSE=1

#################
## OTHER TOOLS ##
#################

# root directory where to find the Elephant tokenizer (RuG)
# it will be downloaded automatically when missing
ELEPHANT_DIR="${DIR_TOOLS}/elephant"

53 changes: 43 additions & 10 deletions models/metrics.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,63 @@
#!/usr/bin/python3
# this script implements evaluation metrics

import sys
import tensorflow as tf
from keras import backend as K


def strict_accuracy(act, pred):
def strict_accuracy_K(act, pred):
"""
Computes accuracy for each batch without factoring in padding symbols
Keras metric that computes the accuracy of tagged sentences for each batch
Predictions with a categorical vector of [1 0 0 ... 0] are not factored in
Inputs:
- act: array of actual categorical vectors
- pred: array of predicted categorical vectors
Outputs:
- accuracy score
"""
# values of actual classes
# numerical values of the actual classes
act_argm = K.argmax(act, axis=-1)
# values of predicted classes
# numerical values of the predicted classes
pred_argm = K.argmax(pred, axis=-1)
# determines where the tags are incorrect (1) or not (0)
# determines where the classes are incorrect or not
incorrect = K.cast(K.not_equal(act_argm, pred_argm), dtype='float32')
# determines where the tags are correct (1) or not (0)
# determines where the classes are correct or not
correct = K.cast(K.equal(act_argm, pred_argm), dtype='float32')
# determines where the tag is a padding tag (1) or not (0)
# determines where the classes are ignored or not
padding = K.cast(K.equal(act_argm, 0), dtype='float32')
# subtract padding from correct predictions and check equality to 1
corr_preds = K.sum(K.cast(K.equal(correct - padding, 1), dtype='float32'))
incorr_preds = K.sum(K.cast(K.equal(incorrect - padding, 1), dtype='float32'))
total = corr_preds + incorr_preds
total_preds = corr_preds + incorr_preds
# actual accuracy without padding
accuracy = corr_preds / total
accuracy = corr_preds / total_preds
return accuracy


def strict_accuracy_N(act, pred, ignore_class=0):
"""
Computes the accuracy of an array of tagged sentences
Actual values which match `ignore_class` are not factored in
Inputs:
- act: array of actual numerical vectors
- pred: array of predicted numerical vectors
- ignore_class: numerical value to be ignored
Outputs:
- accuracy score
"""
# number of correct predictions
corr_preds = 0
# number of predictions
total_preds = 0
# compute values via iterating over sentences
for sent in zip(act, pred):
act_classes = sent[0]
pred_classes = sent[1]
for t in range(len(act_classes)):
if act_classes[t] != ignore_class:
total_preds += 1
if pred_classes[t] == act_classes[t]:
corr_preds += 1
# actual accuracy without padding
return corr_preds / total_preds

24 changes: 6 additions & 18 deletions models/nnmodels.py → models/nn.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
#!/usr/bin/python3
# this script defines the structure of possible neural models

from copy import deepcopy

from keras.models import Model, Input
from keras.layers import Dense, Reshape, Conv2D, LeakyReLU, LSTM, GRU
from keras.layers import add, concatenate
from keras.layers import Embedding, BatchNormalization, Dropout, GaussianNoise
from keras.layers import TimeDistributed, Bidirectional
from keras_contrib.layers import CRF

from models.metrics import strict_accuracy
from models.metrics import strict_accuracy_K
from utils.keras_mapper import get_optimizer, get_loss


Expand Down Expand Up @@ -53,7 +51,7 @@ def get_layer(args, num_units):
return None


def get_model(base_args, num_tags=0, max_slen=0, num_words=0, wemb_dim=0, wemb_matrix=None, max_wlen=0, num_chars=0, cemb_dim=0, cemb_matrix=None, optimizer=None, dropout=None, model_size=None, num_layers=None):
def get_model(args, num_tags=0, max_slen=0, num_words=0, wemb_dim=0, wemb_matrix=None, max_wlen=0, num_chars=0, cemb_dim=0, cemb_matrix=None):
"""
Obtains a neural model as a combination of layers
Inputs:
Expand All @@ -70,17 +68,6 @@ def get_model(base_args, num_tags=0, max_slen=0, num_words=0, wemb_dim=0, wemb_m
Returns:
the compiled Keras neural model defined by the command line arguments
"""
## REDEFINE BASE PARAMETERS
args = deepcopy(base_args)
if optimizer:
args.optimizer = optimizer
if dropout:
args.dropout = dropout
if model_size:
args.model_size = model_size
if num_layers:
args.num_layers = num_layers

## DEFINE NETWORK
if args.use_words:
# word input layer
Expand Down Expand Up @@ -113,7 +100,8 @@ def get_model(base_args, num_tags=0, max_slen=0, num_words=0, wemb_dim=0, wemb_m
if args.batch_normalization:
x = BatchNormalization()(x)
x = LeakyReLU()(x)
if args.dropout > 0:

if args.dropout:
x = Dropout(args.dropout)(x)

x = Conv2D(max_slen, kernel_size=(3, 3), padding='same', data_format='channels_first')(x)
Expand Down Expand Up @@ -188,9 +176,9 @@ def get_model(base_args, num_tags=0, max_slen=0, num_words=0, wemb_dim=0, wemb_m
# define metrics
# we employ Keras default accuracy and our strict accuracy metric
if args.output_activation == 'crf':
model_metrics = [crf.accuracy, strict_accuracy]
model_metrics = [crf.accuracy, strict_accuracy_K]
else:
model_metrics = ['accuracy', strict_accuracy]
model_metrics = ['accuracy', strict_accuracy_K]

# define optimizer
model_opt = get_optimizer(args.optimizer)
Expand Down
118 changes: 118 additions & 0 deletions models/optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@

import sys
import numpy as np
from copy import deepcopy
from collections import OrderedDict
from models.nn import get_model
from models.metrics import strict_accuracy_N


def grid_search_params(base_args, cv_samples, X, y, padding_y, num_tags, max_slen, num_words, wemb_dim, wemb_matrix, max_wlen, num_chars, cemb_dim, cemb_matrix):
args = deepcopy(base_args)
# define model parameters possible values
grid_params = OrderedDict()
#grid_params['epochs'] = [20, 30, 40]
#grid_params['batch_size'] = [512, 1024, 2048]
#grid_params['optimizer'] = ['rmsprop', 'adam', 'nadam']
#grid_params['dropout'] = [0.1, 0.2, 0.3]
#grid_params['model_size'] = [200, 300, 400]
#grid_params['num_layers'] = [1, 2, 3]

grid_params['epochs'] = [2, 3]
grid_params['batch_size'] = [512]
grid_params['optimizer'] = ['adam']
grid_params['dropout'] = [0.1]
grid_params['model_size'] = [200]
grid_params['num_layers'] = [1]

# define parameter combinations
grid_space = np.array(np.meshgrid(grid_params['epochs'],
grid_params['batch_size'],
grid_params['optimizer'],
grid_params['dropout'],
grid_params['model_size'],
grid_params['num_layers'])).T.reshape(-1,len(grid_params))

print([grid_params[x] for x in grid_params.keys()])
print(grid_space)

# perform 3-fold cross validation for each parameter combinations
print('[INFO] Grid-search will optimize the following hyper-parameters:', list(grid_params.keys()))
cv_samples = 3

X_cv_train = X
if args.use_words and args.use_chars:
block_size = len(X[0]) // cv_samples
X_cv_dev = [[],[]]
else:
block_size = len(X) // cv_samples
X_cv_dev = []
y_cv_dev = []
y_cv_train = y

best_acc = 0
best_params = None

# test each parameter combination
for i in range(grid_space.shape[0]):

current_acc = 0
cell = grid_space[i]
args.optimizer = cell[2]
args.dropout = float(cell[3])
args.model_size = int(cell[4])
args.num_layers = int(cell[5])

model = get_model(args, num_tags,
max_slen, num_words, wemb_dim, wemb_matrix,
max_wlen, num_chars, cemb_dim, cemb_matrix)

print('[INFO] ' + str(cv_samples) + '-fold cross-validation using the hyper-parameter set', cell)
for _ in range(cv_samples):
# rotate the block used for testing
if args.use_words and args.use_chars:
if len(X_cv_dev[0]):
X_cv_train = [np.append(X_cv_train[0], X_cv_dev[0], axis=0), np.append(X_cv_train[1], X_cv_dev[1], axis=0)]
X_cv_dev = [X_cv_train[0][:block_size], X_cv_train[1][:block_size]]
X_cv_train = [X_cv_train[0][block_size:], X_cv_train[1][block_size:]]

else:
if len(X_cv_dev):
X_cv_train = np.append(X_cv_train, X_cv_dev, axis=0)
X_cv_dev = X_cv_train[:block_size]
X_cv_train = X_cv_train[block_size:]

if len(y_cv_dev):
y_cv_train = np.append(y_cv_train, y_cv_dev, axis=0)
y_cv_dev = y_cv_train[:block_size]
y_cv_train = y_cv_train[block_size:]

# fit the model
history = model.fit(X_cv_train, np.array(y_cv_train), batch_size = int(cell[1]), epochs=int(cell[0]), validation_split=0.0, verbose=0)

# obtain accuracy on validation

# predictions on the test set
p_cv_dev = model.predict(X_cv_dev, verbose=0)
p_cv_dev = np.argmax(p_cv_dev, axis=-1) + 1
true_cv_dev = np.argmax(y_cv_dev, axis=-1) + 1
current_acc += strict_accuracy_N(true_cv_dev, p_cv_dev, 1)

# average
current_acc = current_acc / cv_samples
print('[INFO] The accuracy with the given hyper-parameters is', current_acc)
if current_acc > best_acc:
best_acc = current_acc
best_params = cell

print('[INFO] The best set of hyper-parameters found is', best_params)

# plug in args
args.epochs = int(best_params[0])
args.batch_size = int(best_params[1])
args.optimizer = best_params[2]
args.dropout = float(best_params[3])
args.model_size = int(best_params[4])
args.num_layers = int(best_params[5])
return args

2 changes: 1 addition & 1 deletion models/semtagger_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from models.argparser import get_args
from models.loader import load_conll_notags, make_char_seqs
from models.nnmodels import get_model
from models.nn import get_model

from utils.input2feats import wordsents2sym, charsents2sym

Expand Down
Loading

0 comments on commit 144f50f

Please sign in to comment.