Commit c162bed3 authored by mjboos's avatar mjboos

recen

parent 964dfb3b
......@@ -137,12 +137,39 @@ def hacky_load_LSTM():
model.load_weights('300_fasttext_LSTM_best.hdf5')
return model
def keras_json_multi_to_one(model_json):
model_dict = json.loads(model_json)
model_dict['config']['layers'][-1]['config']['units'] = 1
return json.dumps(model_dict)
def hacky_load_weights(model_name, model, i=None):
import re
from keras.models import model_from_json
model_path = '../model_specs/{}.json'.format(model_name)
with tf.device('/cpu:0'):
with open(model_path, 'r') as fl:
old_model_json = json.load(fl)
if i is not None:
old_model_json = keras_json_multi_to_one(old_model_json)
best_weights_path="{}_finetune_{}_best.hdf5".format(model_name, i)
else:
best_weights_path="{}_best.hdf5".format(model_name)
old_model = model_from_json(old_model_json)
old_model.load_weights(best_weights_path)
weights = [layer.get_weights() for layer in old_model.layers]
for weight_i, weights_old in enumerate(weights):
if weight_i >= 2:
model.layers[weight_i].set_weights(weights_old)
def transfer_weights_multi_to_one(weights, model, i):
for weights_old, layer in zip(weights[2:-1], model.layers[2:-1]):
layer.set_weights(weights_old)
# now for the last layer
model.layers[-1].set_weights([weights[-1][0][:,i][:,None], weights[-1][1][i][None]])
def get_weights(model):
return [layer.get_weights() for layer in model.layers]
def change_trainable(layer, trainable, verbose=False):
""" Helper method that fixes some of Keras' issues with wrappers and
trainability. Freezes or unfreezes a given layer.
......@@ -279,11 +306,11 @@ def simple_huge_aux_net(trainable=False, prune=True):
return model_params
def simple_huge_1_layer_net(trainable=False, prune=True):
model_func = partial(models.RNN_conc, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=1, hidden_rnn=64, hidden_dense=32)
model_func = partial(models.RNN_conc, rnn_func=keras.layers.LSTM, no_rnn_layers=1, hidden_rnn=96, hidden_dense=None)
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 300, 'trainable' : trainable, 'prune' : prune,
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.0005, 'clipnorm' : 1.}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.0005, 'clipnorm' : 1., 'clipvalue':1., 'beta_2':0.99}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
def simple_huge_net(trainable=False, prune=True):
......@@ -295,11 +322,11 @@ def simple_huge_net(trainable=False, prune=True):
return model_params
def simple_huge_dropout_net(trainable=False, prune=True):
model_func = partial(models.RNN_dropout_conc, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=2, hidden_rnn=96)
model_func = partial(models.RNN_dropout_conc, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=2, hidden_rnn=96, hidden_dense=None)
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 300, 'trainable' : trainable, 'prune' : prune,
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.001, 'clipnorm' : 1.}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.001, 'clipnorm' : 1., 'clipvalue':1., 'beta_2':0.99}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
def simple_small_trainable_net(trainable=False, prune=True):
......@@ -319,11 +346,11 @@ def simple_net(trainable=False, prune=True):
return model_params
def shallow_CNN(trainable=False, prune=True):
model_func = partial(models.CNN_shallow, n_filters=50, kernel_sizes=[3,4,5], dropout=0.5)
model_func = partial(models.CNN_shallow, n_filters=96, kernel_sizes=[3,4,5], dropout=0.3, dropout_embed=0.3, act='relu')
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 300, 'trainable' : trainable, 'prune' : prune,
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, clipvalue=1., clipnorm=1.), 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.0005, 'clipnorm' : 1., 'clipvalue' : 1.}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
def add_net():
......@@ -342,4 +369,9 @@ def old_gru_net(trainable=False, prune=True):
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.001, 'clipnorm' : 1., 'beta_2' : 0.99}, 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
def just_length():
model_params = {
'max_features' : 500000, 'maxlen' : 300,
'embedding_dim' : 300, 'trainable' : False, 'prune' : True,
'compilation_args' : {'optimizer_func' : optimizers.Adam, 'optimizer_args' : {'lr' : 0.001, 'clipnorm' : 1., 'beta_2' : 0.99}, 'loss':'binary_crossentropy', 'loss_weights' : [1.]}}
return model_params
......@@ -52,28 +52,32 @@ if __name__=='__main__':
weight_tensor = tf.convert_to_tensor(class_weights, dtype=tf.float32)
loss = partial(models.weighted_binary_crossentropy, weights=weight_tensor)
loss.__name__ = 'weighted_binary_crossentropy'
model_params = simple_huge_net(prune=True)
# model_params = simple_huge_net(prune=True, trainable=True)
model_params = shallow_CNN()
# model_params['compilation_args']['loss']['main_output'] = models.roc_auc_score
model_name = 'no_clipping'
model_name = 'shallow_CNN'
not_toxic_bnz = np.logical_and(np.logical_not(train_y[:,0]), train_y[:,1:].any(axis=1))
frozen_tokenizer = pre.KerasPaddingTokenizer(max_features=model_params['max_features'], maxlen=model_params['maxlen'])
frozen_tokenizer.fit(pd.concat([train_text, test_text]))
# train_text = pd.concat([train_text[not_toxic_bnz]]*5+[train_text])
# train_y = np.vstack([train_y[not_toxic_bnz]]*5 + [train_y])
# list_of_tokens = frozen_tokenizer.tokenizer.texts_to_sequences(pd.concat([train_text, test_text]))
embedding = hlp.get_glove_embedding('../crawl-300d-2M.vec')
embedding = hlp.get_fasttext_embedding('../crawl-300d-2M.vec')
# embedding = hlp.get_glove_embedding('../glove.twitter.27B.200d.txt')
opt = model_params['compilation_args'].pop('optimizer_func')
optargs = model_params['compilation_args'].pop('optimizer_args')
optargs['lr'] = 0.0005
model_params['compilation_args']['optimizer'] = opt(**optargs)
# model = fit_model(model_name, fit_args, {'main_input':train_text}, {'main_output': train_y, 'aux_output' : aux_task}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# model = load_full_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# hlp.write_model(model.predict({'main_input':test_text})[0])
# hlp.write_model(model.predict({'main_input':test_text}))
# hlp.make_training_set_preds(model, {'main_input':train_text}, train_y)
model = models.Embedding_Blanko_DNN(tokenizer=frozen_tokenizer, embedding=embedding, **model_params)
# old_model.load_weights(model_name+'_best.hdf5')
## old_model.load_weights(model_name+'_best.hdf5')
lrfinder = lrf.LRFinder(model.model)
train_x = frozen_tokenizer.transform(train_text)
lrfinder.find(train_x, train_y, 0.001, 0.05, batch_size=80, epochs=1)
# lrfinder.losses = [np.log(loss) for loss in lrfinder.losses]
## lrfinder.losses = [np.log(loss) for loss in lrfinder.losses]
joblib.dump([lrfinder.losses, lrfinder.lrs], '{}.pkl'.format(model_name))
lrfinder.plot_loss()
plt.savefig('loss_{}.svg'.format(model_name))
......@@ -81,7 +85,7 @@ if __name__=='__main__':
lrfinder.plot_loss_change()
plt.savefig('loss_change_{}.svg'.format(model_name))
plt.close()
#
# model = load_full_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# SHUFFLE TRAINING SET so validation split is different every time
# row_idx = np.arange(0, train_text.shape[0])
......
......@@ -64,7 +64,7 @@ def norm_rank(arr):
def make_weight_matrix(y_mat, weights):
return np.tile(weights[None], (y_mat.shape[0], 1))
def write_model(predictions, correct=None,
def write_model(predictions, correct=None, name='',
cols=['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']):
import pandas as pd
import time
......@@ -74,7 +74,7 @@ def write_model(predictions, correct=None,
subm = pd.read_csv('../input/sample_submission.csv')
submid = pd.DataFrame({'id': subm["id"]})
submission = pd.concat([submid, pd.DataFrame(predictions, columns=cols)], axis=1)
submission.to_csv('../submissions/submission_{}.csv'.format(timestr), index=False)
submission.to_csv('../submissions/submission_{}_{}.csv'.format(name, timestr), index=False)
def logit(x):
x[x==1.] -= np.finfo(np.float32).eps
......@@ -93,7 +93,8 @@ def cross_val_score_with_estimators(classifier_func, X, y, cv=6, scoring=None):
clf = classifier_func().fit(X[train], y[train])
scores.append(scoring(y[test], clf.predict_proba(X[test])[:,1]))
estimators.append(clf)
return scores, estimators
final_estimator = classifier_func().fit(X, y)
return scores, estimators, final_estimator
def sparse_to_dense(X):
return X.toarray()
......
......@@ -21,7 +21,7 @@ from keras.utils import to_categorical
from keras.layers import Dense, Input, GlobalMaxPooling1D
import tensorflow as tf
from keras import backend
from keras.layers import Conv1D, MaxPooling1D, Embedding, Reshape, Activation, Lambda
from keras.layers import Conv1D, MaxPooling1D, Embedding, Reshape, Activation, Lambda, SpatialDropout1D
from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras import optimizers
......@@ -36,6 +36,7 @@ import string
import json
import enchant
import copy
import DNN
from keras.engine.topology import Layer
import keras.backend as K
from keras import initializers
......@@ -296,12 +297,13 @@ def make_embedding_layer(embedding_matrix, maxlen=200, l2=1e-6, trainable=False,
# note that we set trainable = False so as to keep the embeddings fixed
from keras.regularizers import L1L2
embed_reg = L1L2(l2=l2) if l2 != 0 and trainable else None
embedding_layer = Embedding(embedding_matrix.shape[0],
embedding_matrix.shape[1],
weights=[embedding_matrix],
embeddings_regularizer=embed_reg,
input_length=maxlen,
trainable=trainable)
with tf.device('/cpu:0'):
embedding_layer = Embedding(embedding_matrix.shape[0],
embedding_matrix.shape[1],
weights=[embedding_matrix],
embeddings_regularizer=embed_reg,
input_length=maxlen,
trainable=trainable)
return embedding_layer
def pop_meta_features(tokenizer):
......@@ -395,7 +397,7 @@ class EmbeddingSemiTrainable(Layer):
class Embedding_Blanko_DNN(BaseEstimator):
def __init__(self, embedding=None, max_features=20000, model_function=None, tokenizer=None, n_out=6, meta_features=True,
maxlen=300, embedding_dim=300, trainable=False, prune=True, augment_data=False, list_of_tokens=None,
maxlen=300, embedding_dim=300, trainable=False, prune=True, augment_data=False, list_of_tokens=None, config=False,
compilation_args={'optimizer':'adam','loss':'binary_crossentropy','metrics':['accuracy']}, embedding_args={'n_components' : 100}):
self.compilation_args = compilation_args
self.max_features = max_features
......@@ -438,7 +440,16 @@ class Embedding_Blanko_DNN(BaseEstimator):
trainable=self.trainable)
sequence_input = Input(shape=(self.maxlen,), dtype='int32', name='main_input')
embedded_sequences = embedding_layer(sequence_input)
# if not config:
outputs, aux_input = self.model_function(embedded_sequences, n_out=self.n_out)
# else:
# if isinstance(config, str):
# config = DNN.load_keras_model(config).get_config()
# config_layers = config['layers'][2:]
# x = embedded_sequences
# for layer in config_layers:
# x = keras.layers.deserialize(layer)(x)
# outputs, aux_input = x, None
if aux_input is not None:
inputs = [sequence_input, aux_input]
else:
......@@ -929,21 +940,22 @@ def RNN_aux_loss(x, no_rnn_layers=1, hidden_rnn=64, hidden_dense=32, rnn_func=No
x = Dense(n_out, activation="sigmoid", name='main_output')(x)
return [x, aux_dense], None
def RNN_dropout_conc(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5, dropout_embed=0.5,n_out=6):
def RNN_dropout_conc(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5, dropout_embed=0.5,n_out=6,mask=None):
if rnn_func is None:
rnn_func = CuDNNLSTM
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
vals = []
# x = entry_stop_gradients(x, mask)
vals = [x]
x = Dropout(dropout_embed, noise_shape=(None, 1, int(x.shape[-1])))(x)
for i, rnn_size in enumerate(hidden_rnn):
if i > 0:
x = Dropout(dropout)(x)
x = Bidirectional(rnn_func(int(rnn_size), return_sequences=True))(x)
vals.append(x)
x = concatenate([GlobalAveragePooling1D()(x)] + [GlobalMaxPool1D()(val) for val in vals] + [Lambda(lambda x : x[:,-1, :])(val) for val in vals])
x = concatenate([GlobalAveragePooling1D()(x)] + [GlobalMaxPool1D()(val) for val in vals] + [Lambda(lambda x : x[:,-1, :])(val) for val in vals[1:]])
x = Dropout(dropout)(x)
# x = BatchNormalization(x)
# x = Dense(int(hidden_dense), activation='relu')(x)
......@@ -1032,32 +1044,22 @@ def RNN_general_one_class(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rn
x = Dense(1, activation="sigmoid", name='main_output')(x)
return x, None
def CNN_shallow(x, n_filters=100, kernel_sizes=[3,4,5], dropout=0.5,n_out=6):
def CNN_shallow(x, n_filters=64, kernel_sizes=[3,4,5], dropout_embed=0.5, dropout=0.5, n_out=6,act=None):
outputs = []
for kernel_size in kernel_sizes:
output_i = Conv1D(n_filters, kernel_size=kernel_size,
activation='relu',
x = SpatialDropout1D(dropout_embed)(x)
if not isinstance(n_filters, list):
n_filters = [n_filters] * len(kernel_sizes)
for n_filter, kernel_size in zip(n_filters, kernel_sizes):
output_i = Conv1D(n_filter, kernel_size=kernel_size,
activation=act,
padding='valid')(x)
output_i = GlobalMaxPooling1D()(output_i)
outputs.append(output_i)
outputs.append(GlobalMaxPooling1D()(output_i))
outputs.append(GlobalAveragePooling1D()(x))
x = concatenate(outputs, axis=1)
x = Dropout(rate=dropout)(x)
x = Dense(n_out, activation="sigmoid", name='main_output')(x)
return x, None
def CNN_shallow_1d(x, n_filters=100, kernel_sizes=[3,4,5], dropout=0.5,n_out=6):
outputs = []
for kernel_size in kernel_sizes:
output_i = Conv1D(n_filters, kernel_size=kernel_size,
activation='relu',
padding='valid')(x)
output_i = GlobalMaxPooling1D()(output_i)
outputs.append(output_i)
x = concatenate(outputs, axis=1)
x = Dropout(rate=dropout)(x)
x = Dense(1, activation="sigmoid", name='main_output')(x)
return x, None
def roc_auc_score(y_true, y_pred):
""" ROC AUC Score.
Approximates the Area Under Curve score, using approximation based on
......
......@@ -111,11 +111,9 @@ def clean_comment(text, replace_misspellings=True):
text = re.sub(r'[^\x00-\x7f]', r' ' , text)
# text = re.sub(r'[\n\r]', r' ', text)
s = re.sub(r"what's", "what is ", text, flags=re.IGNORECASE)
s = re.sub(r"\'s", " ", s, flags=re.IGNORECASE)
s = re.sub(r"\'ve", " have ", s, flags=re.IGNORECASE)
s = re.sub(r"can't", "cannot ", s, flags=re.IGNORECASE)
s = re.sub(r"won't", "will not ", s, flags=re.IGNORECASE)
s = re.sub(r"n't", " not ", s, flags=re.IGNORECASE)
s = re.sub(r"i'm", "i am ", s, flags=re.IGNORECASE)
s = re.sub(r"\'re", " are ", s, flags=re.IGNORECASE)
s = re.sub(r"\'d", " would ", s, flags=re.IGNORECASE)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment