doc2vec update

This commit is contained in:
annealias 2019-03-25 12:46:40 +01:00
parent ea0a132bd6
commit 86e34de8ab
1 changed files with 3 additions and 3 deletions

View File

@ -18,7 +18,7 @@ import sklearn
from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import MultinomialNB from sklearn.naive_bayes import MultinomialNB
class MultinomialNaiveBayes: class MultinomialNaiveBayes_Word2Vec:
def make_mnb(dataset, sklearn_cv=True, percentile=100): def make_mnb(dataset, sklearn_cv=True, percentile=100):
'''fits naive bayes model with StratifiedKFold '''fits naive bayes model with StratifiedKFold
@ -73,7 +73,7 @@ class MultinomialNaiveBayes:
all_data = read_corpus(X, tokens_only=False) all_data = read_corpus(X, tokens_only=False)
# instantiate a Doc2Vec object # instantiate a Doc2Vec object
doc2vec_model = Doc2Vec(training_data, vector_size=5, window=2, min_count=1, workers=4) doc2vec_model = Doc2Vec(training_data, vector_size=100, window=2, min_count=1, workers=4)
print(doc2vec_model.docvecs[0]) print(doc2vec_model.docvecs[0])
print(doc2vec_model.docvecs[1]) print(doc2vec_model.docvecs[1])
@ -132,4 +132,4 @@ if __name__ == '__main__':
quotechar='\'') quotechar='\'')
# select only labeled articles # select only labeled articles
MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1][:100].reset_index(drop=True), sklearn_cv=False, percentile=100) MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1].reset_index(drop=True), sklearn_cv=False, percentile=100)