Commit 1a5644ce authored by mjboos's avatar mjboos

stuff

parent 3bb1f34b
......@@ -48,26 +48,20 @@ def make_callback_list(model_name, save_weights=True, patience=5):
checkpoints.append(checkpoint)
return checkpoints
def continue_training_DNN(model_name, *args, **kwargs):
def continue_training_DNN(model_name, fit_args, *args, **kwargs):
best_weights_path="{}_best.hdf5".format(model_name)
model = models.Embedding_Blanko_DNN(**kwargs)
model.model.load_weights(best_weights_path)
logger = CSVLogger('../logs/{}_more.csv'.format(model_name), separator=',', append=True)
best_weights_path="{}_more_best.hdf5".format(model_name)
early = EarlyStopping(monitor="val_loss", mode="min", patience=10)
checkpoint = ModelCheckpoint(best_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [logger, checkpoint, early] #early
callbacks_list = make_callback_list(model_name+'_more', patience=5)
fit_args['callbacks'] = callbacks_list
model.fit(*args, **fit_args)
return model
def continue_training_DNN_one_output(model_name, i, weights, *args, **kwargs):
def continue_training_DNN_one_output(model_name, i, weights, fit_args, *args, **kwargs):
best_weights_path="{}_best.hdf5".format(model_name)
model = models.Embedding_Blanko_DNN(**kwargs)
transfer_weights_multi_to_one(weights, model.model, i)
logger = CSVLogger('../logs/{}.csv'.format(model_name), separator=',', append=False)
checkpoint = ModelCheckpoint(best_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [logger, checkpoint, early] #early
callbacks_list = make_callback_list(model_name, patience=5)
fit_args['callbacks'] = callbacks_list
model.fit(*args, **fit_args)
model.model.load_weights(best_weights_path)
......@@ -82,6 +76,10 @@ def predict_for_all(model):
predictions = model.predict(test_text)
hlp.write_model(predictions)
def conc_finetuned_preds(model_name):
predictions = np.concatenate([joblib.load('{}_{}.pkl'.format(model_name,i))[:,None] for i in xrange(6)], axis=1)
hlp.write_model(predictions)
def fit_model(model_name, fit_args, *args, **kwargs):
fit_args['callbacks'] = make_callback_list(model_name)
model = train_DNN(model_name, fit_args, *args, **kwargs)
......@@ -112,34 +110,11 @@ def transfer_weights_multi_to_one(weights, model, i):
# now for the last layer
model.layers[-1].set_weights([weights[-1][0][:,i][:,None], weights[-1][1][i][None]])
def fine_tune_model(model_name, old_model, train_X, train_y, **kwargs):
def fine_tune_model(model_name, old_model, fit_args, train_X, train_y, **kwargs):
'''Fits and returns a model for one label (provided as index i)'''
weights = [layer.get_weights() for layer in old_model.layers]
for i in xrange(6):
new_name = model_name + '_{}'.format(i)
predict_for_one_category(new_name,
continue_training_DNN_one_output(new_name, i, weights, train_X, train_y[:,i], **kwargs))
if __name__=='__main__':
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNLSTM, no_rnn_layers=1)
aux_task = train_y.sum(axis=1) > 0
class_weights = hlp.get_class_weights(train_y)
weight_tensor = tf.convert_to_tensor(class_weights, dtype=tf.float32)
loss = partial(models.weighted_binary_crossentropy, weights=weight_tensor)
loss.__name__ = 'weighted_binary_crossentropy'
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 300,
'embedding_dim' : 300, 'trainable' : False,
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, beta_2=0.99), 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
frozen_tokenizer = pre.KerasPaddingTokenizer(max_features=model_params['max_features'], maxlen=model_params['maxlen'])
frozen_tokenizer.fit(pd.concat([train_text, test_text]))
model_name = '300_fasttext_cuda_just_that_LSTM'
embedding = hlp.get_fasttext_embedding('../crawl-300d-2M.vec')
# model = load_full_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
model = fit_model(model_name, {'main_input':train_text}, {'main_output':train_y}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
hlp.write_model(model.predict(test_text))
# K.clear_session()
# model_params['compilation_args']['optimizer'] = optimizers.Adam(lr=0.0005, beta_2=0.99)
# model = continue_training_DNN(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# hlp.write_model(model.predict(test_text))
model = continue_training_DNN_one_output(new_name, i, weights, fit_args, train_X, train_y[:,i], **kwargs)
joblib.dump(model.predict(test_text), '{}.pkl'.format(new_name))
......@@ -35,7 +35,7 @@ lr = LearningRateScheduler(schedule)
callbacks_list = [checkpoint, early] #early
fit_args = {'batch_size' : 128, 'epochs' : 30,
fit_args = {'batch_size' : 100, 'epochs' : 30,
'validation_split' : 0.2, 'callbacks' : callbacks_list}
train_text, train_y = pre.load_data()
......@@ -49,8 +49,16 @@ def aux_net():
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, beta_2=0.99), 'loss':{'main_output': 'binary_crossentropy', 'aux_output' : 'binary_crossentropy'}, 'loss_weights' : [1., 0.1]}}
return model_params
def simple_one_output_net():
model_func = partial(models.RNN_general_one_class, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=2, hidden_rnn=96, hidden_dense=48)
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 300, 'trainable' : False,
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, beta_2=0.99, clipvalue=1., clipnorm=1.), 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
def simple_net():
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNLSTM, no_rnn_layers=2, hidden_rnn=64, hidden_dense=48)
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=2, hidden_rnn=96, hidden_dense=48)
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 300, 'trainable' : False,
......@@ -58,9 +66,9 @@ def simple_net():
return model_params
def add_net():
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNLSTM, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48)
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNGRU, no_rnn_layers=2, hidden_rnn=96, hidden_dense=48)
model_params = {
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 400,
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 500,
'embedding_dim' : 400, 'trainable' : False,
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, beta_2=0.99, clipvalue=1., clipnorm=1.), 'loss':{'main_output': 'binary_crossentropy'}, 'loss_weights' : [1.]}}
return model_params
......@@ -71,22 +79,24 @@ if __name__=='__main__':
weight_tensor = tf.convert_to_tensor(class_weights, dtype=tf.float32)
loss = partial(models.weighted_binary_crossentropy, weights=weight_tensor)
loss.__name__ = 'weighted_binary_crossentropy'
model_params = simple_net()
model_name = '300_fasttext_cuda_2_layers_LSTM'
model_params = simple_one_output_net()
model_name = '300_fasttext_cuda_2_layers_larger_GRU'
frozen_tokenizer = pre.KerasPaddingTokenizer(max_features=model_params['max_features'], maxlen=model_params['maxlen'])
frozen_tokenizer.fit(pd.concat([train_text, test_text]))
embedding = hlp.get_fasttext_embedding('../crawl-300d-2M.vec')
# embedding = hlp.join_embedding_vec(embedding, '../crawl-300d-2M.vec')
# model = load_full_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# SHUFFLE TRAINING SET so validation split is different every time
row_idx = np.arange(0, train_text.shape[0])
np.random.shuffle(row_idx)
train_text, train_y, aux_task = train_text[row_idx], train_y[row_idx], aux_task[row_idx]
model = fit_model(model_name, fit_args, {'main_input':train_text}, {'main_output':train_y}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
keras_model = load_keras_model(model_name)
# model = fit_model(model_name, fit_args, {'main_input':train_text}, {'main_output':train_y}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# model = continue_training_DNN(model_name, fit_args, train_text, train_y, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# model = load_full_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# model = continue_training_DNN(model_name, {'main_input':train_text}, {'main_output':train_y}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
hlp.write_model(model.predict(test_text))
K.clear_session()
fine_tune_model(model_name, keras_model, fit_args, train_text, train_y, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
conc_finetuned_preds(model_name)
# hlp.write_model(model.predict(test_text))
# K.clear_session()
# model_params['compilation_args']['optimizer'] = optimizers.Adam(lr=0.0005, beta_2=0.99)
# model = continue_training_DNN(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# hlp.write_model(model.predict(test_text))
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division
import sys
from os.path import dirname
from keras import initializers
from keras.engine import InputSpec, Layer
from keras import backend as K
class AttentionWeightedAverage(Layer):
"""
Computes a weighted average of the different channels across timesteps.
Uses 1 parameter pr. channel to compute the attention value for a single timestep.
"""
def __init__(self, return_attention=False, **kwargs):
self.init = initializers.get('uniform')
self.supports_masking = True
self.return_attention = return_attention
super(AttentionWeightedAverage, self).__init__(** kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(ndim=3)]
assert len(input_shape) == 3
self.W = self.add_weight(shape=(input_shape[2], 1),
name='{}_W'.format(self.name),
initializer=self.init)
self.trainable_weights = [self.W]
super(AttentionWeightedAverage, self).build(input_shape)
def call(self, x, mask=None):
# computes a probability distribution over the timesteps
# uses 'max trick' for numerical stability
# reshape is done to avoid issue with Tensorflow
# and 1-dimensional weights
logits = K.dot(x, self.W)
x_shape = K.shape(x)
logits = K.reshape(logits, (x_shape[0], x_shape[1]))
ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))
# masked timesteps have zero weight
if mask is not None:
mask = K.cast(mask, K.floatx())
ai = ai * mask
att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
weighted_input = x * K.expand_dims(att_weights)
result = K.sum(weighted_input, axis=1)
if self.return_attention:
return [result, att_weights]
return result
def get_output_shape_for(self, input_shape):
return self.compute_output_shape(input_shape)
def compute_output_shape(self, input_shape):
output_len = input_shape[2]
if self.return_attention:
return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
return (input_shape[0], output_len)
def compute_mask(self, input, input_mask=None):
if isinstance(input_mask, list):
return [None] * len(input_mask)
else:
return None
......@@ -4,6 +4,7 @@ from __future__ import division
import numpy as np
import pandas as pd
import joblib
from attlayer import AttentionWeightedAverage
import pandas as pd, numpy as np
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import cross_val_score
......@@ -24,6 +25,7 @@ from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras import optimizers
from keras.layers import LSTM, Bidirectional, GlobalMaxPool1D, Dropout, BatchNormalization, MaxPooling1D
from keras.layers.merge import concatenate
from keras.layers import CuDNNLSTM, CuDNNGRU, GRU
from keras.preprocessing import text, sequence
from keras.callbacks import EarlyStopping, ModelCheckpoint
......@@ -362,6 +364,29 @@ def LSTM_twice_dropout_model(x):
x = Dense(6, activation="sigmoid")(x)
return x
#WORK IN PROGRESS!!!
def RNN_aux_loss_skip(x, no_rnn_layers=2, hidden_rnn=64, hidden_dense=48, rnn_func=None, dropout=0.5, aux_dim=1):
if rnn_func is None:
rnn_func = CuDNNGRU
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
act_list = [x]
for rnn_size in hidden_rnn:
x = Dropout(dropout)(x)
x = Bidirectional(rnn_func(rnn_size, return_sequences=True))(x)
act_list.append(x)
conc_act = concatenate(conc_act)
aux_dense = Dense(aux_dim, activation='sigmoid', name='aux_output')(conc_act)
x = GlobalMaxPool1D()(x)
x = Dropout(dropout)(x)
x = Dense(hidden_dense, activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(6, activation="sigmoid", name='main_output')(x)
return [x, aux_dense], None
def RNN_aux_loss(x, no_rnn_layers=1, hidden_rnn=64, hidden_dense=32, rnn_func=None, dropout=0.5, aux_dim=1):
if rnn_func is None:
rnn_func = LSTM
......@@ -380,6 +405,49 @@ def RNN_aux_loss(x, no_rnn_layers=1, hidden_rnn=64, hidden_dense=32, rnn_func=No
x = Dense(6, activation="sigmoid", name='main_output')(x)
return [x, aux_dense], None
def RNN_general_att(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5):
if rnn_func is None:
rnn_func = CuDNNLSTM
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
vals = [x]
for rnn_size in hidden_rnn:
x = Dropout(dropout)(x)
x = Bidirectional(rnn_func(int(rnn_size), return_sequences=True))(x)
val.append(x)
conc = concatenate(vals)
x = AttentionWeightedAverage(name='attlayer')(conc)
# x = Dropout(dropout)(x)
# x = BatchNormalization(x)
# x = Dense(int(hidden_dense), activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(6, activation="sigmoid", name='main_output')(x)
return x, None
def RNN_general_skip(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5):
if rnn_func is None:
rnn_func = CuDNNLSTM
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
vals = [x]
for rnn_size in hidden_rnn:
x = Dropout(dropout)(x)
x = Bidirectional(rnn_func(int(rnn_size), return_sequences=True))(x)
vals.append(x)
conc = concatenate(vals)
x = GlobalMaxPool1D()(conc)
x = Dropout(dropout)(x)
# x = BatchNormalization(x)
x = Dense(int(hidden_dense), activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(6, activation="sigmoid", name='main_output')(x)
return x, None
def RNN_general(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5):
if rnn_func is None:
rnn_func = CuDNNLSTM
......@@ -398,6 +466,27 @@ def RNN_general(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=Non
x = Dense(6, activation="sigmoid", name='main_output')(x)
return x, None
def RNN_general_one_class(x, no_rnn_layers=2, hidden_rnn=48, hidden_dense=48, rnn_func=None, dropout=0.5):
if rnn_func is None:
rnn_func = CuDNNLSTM
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
for rnn_size in hidden_rnn:
x = Dropout(dropout)(x)
x = Bidirectional(rnn_func(int(rnn_size), return_sequences=True))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(dropout)(x)
# x = BatchNormalization(x)
x = Dense(int(hidden_dense), activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(1, activation="sigmoid", name='main_output')(x)
return x, None
def CNN_shallow():
pass
def LSTM_CUDA_dropout_model(x):
x = Bidirectional(CuDNNLSTM(64, return_sequences=True, dropout=0.5))(x)
x = GlobalMaxPool1D()(x)
......
......@@ -52,7 +52,7 @@ def clean_comment(text):
@memory.cache
def data_preprocessing(df):
df['comment_text'].fillna('', inplace=True)
df['comment_text'].fillna(' ', inplace=True)
df['comment_text'] = df['comment_text'].apply(clean_comment)
return df
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment