Commit c16dc699 authored by mjboos's avatar mjboos

dnn

parent c7235e90
......@@ -8,17 +8,23 @@ import joblib
import pandas as pd, numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.multioutput import MultiOutputClassifier
from keras import backend as K
import keras
from sklearn.model_selection import cross_val_score
import helpers as hlp
import models
import preprocessing as pre
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, CSVLogger
import json
import feature_engineering
from functools import partial
memory = joblib.Memory(cachedir='/home/mboos/joblib')
best_weights_path="weights_base.best.hdf5"
checkpoint = ModelCheckpoint(best_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
def schedule(ind):
a = [0.002,0.002,0.002,0.001,0.001]
return a[ind]
......@@ -26,30 +32,31 @@ lr = LearningRateScheduler(schedule)
callbacks_list = [checkpoint, early] #early
fit_args = {'batch_size' : 256, 'epochs' : 15,
fit_args = {'batch_size' : 256, 'epochs' : 20,
'validation_split' : 0.2, 'callbacks' : callbacks_list}
train_text, train_y = pre.load_data()
test_text, _ = pre.load_data('test.csv')
def train_DNN(model_name, **kwargs):
def train_DNN(model_name, *args, **kwargs):
best_weights_path="{}_best.hdf5".format(model_name)
model = models.Embedding_Blanko_DNN(**kwargs)
with open('../model_specs/{}.json'.format(model_name), 'w') as fl:
json.dump(model.model.to_json(), fl)
model.fit(train_text, train_y, **fit_args)
model.fit(*args, **fit_args)
model.model.load_weights(best_weights_path)
return model
def continue_training_DNN(model_name, **kwargs):
def continue_training_DNN(model_name, *args, **kwargs):
best_weights_path="{}_best.hdf5".format(model_name)
model = models.Embedding_Blanko_DNN(**kwargs)
model.model.load_weights(best_weights_path)
logger = CSVLogger('../logs/{}.csv'.format(model_name), separator=',', append=True)
logger = CSVLogger('../logs/{}_more.csv'.format(model_name), separator=',', append=True)
best_weights_path="{}_more_best.hdf5".format(model_name)
checkpoint = ModelCheckpoint(best_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [logger, checkpoint, early] #early
fit_args['callbacks'] = callbacks_list
model.fit(train_text, train_y, **fit_args)
model.fit(*args, **fit_args)
return model
def continue_training_DNN_one_output(model_name, i, weights, **kwargs):
......@@ -73,13 +80,13 @@ def predict_for_all(model):
predictions = model.predict(test_text)
hlp.write_model(predictions)
def fit_model(model_name, **kwargs):
def fit_model(model_name, *args, **kwargs):
best_weights_path="{}_best.hdf5".format(model_name)
logger = CSVLogger('../logs/{}.csv'.format(model_name), separator=',', append=False)
checkpoint = ModelCheckpoint(best_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [logger, checkpoint, early] #early
fit_args['callbacks'] = callbacks_list
model = train_DNN(model_name, **kwargs)
model = train_DNN(model_name, *args, **kwargs)
return model
def load_keras_model(model_name, **kwargs):
......@@ -118,14 +125,21 @@ def fine_tune_model(model_name, old_model, **kwargs):
if __name__=='__main__':
model_func = partial(models.RNN_general, rnn_func=keras.layers.CuDNNLSTM)
model_params = {
'max_features' : 500000, 'model_function' : models.LSTM_dropout_model, 'maxlen' : 500,
'embedding_dim' : 300,
'compilation_args' : {'optimizer' : 'adam', 'loss':'binary_crossentropy','metrics':['accuracy']}}
'max_features' : 500000, 'model_function' : model_func, 'maxlen' : 300,
'embedding_dim' : 300, 'trainable' : False,
'compilation_args' : {'optimizer' : optimizers.Adam(lr=0.001, beta_2=0.9), 'loss':{'main_output':'binary_crossentropy'}, 'metrics':['accuracy']}}
frozen_tokenizer = pre.KerasPaddingTokenizer(max_features=model_params['max_features'], maxlen=model_params['maxlen'])
frozen_tokenizer.fit(pd.concat([train_text, test_text]))
model_name = '300_fasttext_CC_LSTM'
model_name = '300_fasttext_cudnn_LSTM'
embedding = hlp.get_fasttext_embedding('../crawl-300d-2M.vec')
model = fit_model(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# aux_task = train_text.apply(feature_engineering.proportion_capitals)
model = fit_model(model_name, {'main_input':train_text}, {'main_output':train_y,'aux_output':aux_task}, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
hlp.write_model(model.predict(test_text))
K.clear_session()
# model_params['compilation_args']['optimizer'] = optimizers.Adam(lr=0.0005, beta_2=0.99)
# model = continue_training_DNN(model_name, embedding=embedding, tokenizer=frozen_tokenizer, **model_params)
# hlp.write_model(model.predict(test_text))
......@@ -23,6 +23,7 @@ from keras.models import Model
from keras.models import Model
from keras.layers import Dense, Embedding, Input
from keras.layers import LSTM, Bidirectional, GlobalMaxPool1D, Dropout, BatchNormalization, MaxPooling1D
from keras.layers import CuDNNLSTM, CuDNNGRU, GRU
from keras.preprocessing import text, sequence
from keras.callbacks import EarlyStopping, ModelCheckpoint
import keras.preprocessing.text
......@@ -95,7 +96,7 @@ def tfidf_model(pre_args={'ngram_range' : (1,2), 'tokenizer' : None,
def keras_token_model(model_fuction=None, max_features=20000, maxlen=100, embed_size=128):
if model_function is None:
model_function = LSTM_dropout_model
inp = Input(shape=(maxlen, ))
inp = Input(shape=(maxlen, ), name='main_input')
x = Embedding(max_features, embed_size)(inp)
x = Bidirectional(LSTM(50, return_sequences=True))(x)
x = GlobalMaxPool1D()(x)
......@@ -257,8 +258,12 @@ class Embedding_Blanko_DNN(BaseEstimator):
trainable=self.trainable, preprocess_embedding=self.preprocess_embedding, **self.embedding_args)
sequence_input = Input(shape=(self.maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = self.model_function(embedded_sequences)
self.model = Model(inputs=sequence_input, outputs=x)
outputs, aux_input = self.model_function(embedded_sequences)
if aux_input:
inputs = [sequence_input, aux_input]
else:
inputs = sequence_input
self.model = Model(inputs=inputs, outputs=outputs)
self.model.compile(**self.compilation_args)
def fit(self, X, y, **kwargs):
......@@ -268,10 +273,14 @@ class Embedding_Blanko_DNN(BaseEstimator):
embedding_matrix = make_embedding_matrix(self.embedding, word_index, max_features=self.max_features, maxlen=self.maxlen, embedding_dim=self.embedding_dim, correct_spelling=self.correct_spelling)
embedding_matrix, self.tokenizer.tokenizer = add_oov_vector_and_prune(embedding_matrix, self.tokenizer.tokenizer)
embedding_layer = make_embedding_layer(embedding_matrix, maxlen=self.maxlen, trainable=self.trainable, preprocess_embedding=self.preprocess_embedding, **self.embedding_args)
sequence_input = Input(shape=(self.maxlen,), dtype='int32')
sequence_input = Input(shape=(self.maxlen,), dtype='int32', name='main_input')
embedded_sequences = embedding_layer(sequence_input)
x = self.model_function(embedded_sequences)
self.model = Model(inputs=sequence_input, outputs=x)
outputs, aux_input = self.model_function(embedded_sequences)
if aux_input:
inputs = [sequence_input, aux_input]
else:
inputs = sequence_input
self.model = Model(inputs=inputs, outputs=outputs)
self.model.compile(**self.compilation_args)
X_t = self.tokenizer.transform(X)
......@@ -331,6 +340,31 @@ def LSTM_twice_dropout_model(x):
x = Dense(6, activation="sigmoid")(x)
return x
def RNN_general(x, no_rnn_layers=1, hidden_rnn=64, hidden_dense=32, rnn_func=None, dropout=0.5):
if rnn_func is None:
rnn_func = LSTM
if not isinstance(hidden_rnn, list):
hidden_rnn = [hidden_rnn] * no_rnn_layers
if len(hidden_rnn) != no_rnn_layers:
raise ValueError('list of recurrent units needs to be equal to no_rnn_layers')
for rnn_size in hidden_rnn:
x = Bidirectional(rnn_func(rnn_size, return_sequences=True, dropout=dropout))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(dropout)(x)
x = Dense(hidden_dense, activation='relu')(x)
x = Dropout(dropout)(x)
x = Dense(6, activation="sigmoid", name='main_output')(x)
return x
def LSTM_CUDA_dropout_model(x):
x = Bidirectional(CuDNNLSTM(64, return_sequences=True, dropout=0.5))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.5)(x)
x = Dense(32, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(6, activation="sigmoid")(x)
return x, None
def LSTM_dropout_model(x):
x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.5))(x)
x = GlobalMaxPool1D()(x)
......@@ -338,7 +372,7 @@ def LSTM_dropout_model(x):
x = Dense(32, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(6, activation="sigmoid")(x)
return x
return x, None
def LSTM_one_class(x, model_func=None):
if model_func is None:
......@@ -352,3 +386,14 @@ def LSTM_one_class(x, model_func=None):
x = Dense(1, activation="sigmoid")(x)
return x
def LSTM_aux_output_model(x):
x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.5))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.5)(x)
dense_layer = Dense(32, activation="relu")(x)
x = Dropout(0.5)(dense_layer)
output = Dense(6, activation="sigmoid")(x)
other_output = Dense(1, name='aux_output')(dense_layer)
return [output, other_output], None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment