Commit 964dfb3b authored by MorBoos's avatar MorBoos

fix bug in preprocessing

parent 6bd0b6b8
......@@ -114,6 +114,7 @@ def clean_comment(text, replace_misspellings=True):
s = re.sub(r"\'s", " ", s, flags=re.IGNORECASE)
s = re.sub(r"\'ve", " have ", s, flags=re.IGNORECASE)
s = re.sub(r"can't", "cannot ", s, flags=re.IGNORECASE)
s = re.sub(r"won't", "will not ", s, flags=re.IGNORECASE)
s = re.sub(r"n't", " not ", s, flags=re.IGNORECASE)
s = re.sub(r"i'm", "i am ", s, flags=re.IGNORECASE)
s = re.sub(r"\'re", " are ", s, flags=re.IGNORECASE)
......@@ -152,7 +153,7 @@ def clean_comment(text, replace_misspellings=True):
@memory.cache
def data_preprocessing(df, replace_misspellings=True):
df['comment_text'].fillna('', inplace=True)
df['comment_text'].fillna(' ', inplace=True)
clean_comment_dummy = partial(clean_comment, replace_misspellings=replace_misspellings)
df['comment_text'] = df['comment_text'].apply(clean_comment_dummy)
return df
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment