From 86e34de8abe7f77fd3a4f7c1013d2d9fcc1e7100 Mon Sep 17 00:00:00 2001 From: annealias Date: Mon, 25 Mar 2019 12:46:40 +0100 Subject: [PATCH] doc2vec update --- src/MultinomialNaiveBayes_Word2Vec.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/MultinomialNaiveBayes_Word2Vec.py b/src/MultinomialNaiveBayes_Word2Vec.py index 80d8a31..95c56a9 100644 --- a/src/MultinomialNaiveBayes_Word2Vec.py +++ b/src/MultinomialNaiveBayes_Word2Vec.py @@ -18,7 +18,7 @@ import sklearn from sklearn.model_selection import StratifiedKFold from sklearn.naive_bayes import MultinomialNB -class MultinomialNaiveBayes: +class MultinomialNaiveBayes_Word2Vec: def make_mnb(dataset, sklearn_cv=True, percentile=100): '''fits naive bayes model with StratifiedKFold @@ -73,7 +73,7 @@ class MultinomialNaiveBayes: all_data = read_corpus(X, tokens_only=False) # instantiate a Doc2Vec object - doc2vec_model = Doc2Vec(training_data, vector_size=5, window=2, min_count=1, workers=4) + doc2vec_model = Doc2Vec(training_data, vector_size=100, window=2, min_count=1, workers=4) print(doc2vec_model.docvecs[0]) print(doc2vec_model.docvecs[1]) @@ -132,4 +132,4 @@ if __name__ == '__main__': quotechar='\'') # select only labeled articles - MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1][:100].reset_index(drop=True), sklearn_cv=False, percentile=100) \ No newline at end of file + MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1].reset_index(drop=True), sklearn_cv=False, percentile=100) \ No newline at end of file