doc2vec update
This commit is contained in:
parent
ea0a132bd6
commit
86e34de8ab
|
@ -18,7 +18,7 @@ import sklearn
|
||||||
from sklearn.model_selection import StratifiedKFold
|
from sklearn.model_selection import StratifiedKFold
|
||||||
from sklearn.naive_bayes import MultinomialNB
|
from sklearn.naive_bayes import MultinomialNB
|
||||||
|
|
||||||
class MultinomialNaiveBayes:
|
class MultinomialNaiveBayes_Word2Vec:
|
||||||
|
|
||||||
def make_mnb(dataset, sklearn_cv=True, percentile=100):
|
def make_mnb(dataset, sklearn_cv=True, percentile=100):
|
||||||
'''fits naive bayes model with StratifiedKFold
|
'''fits naive bayes model with StratifiedKFold
|
||||||
|
@ -73,7 +73,7 @@ class MultinomialNaiveBayes:
|
||||||
all_data = read_corpus(X, tokens_only=False)
|
all_data = read_corpus(X, tokens_only=False)
|
||||||
|
|
||||||
# instantiate a Doc2Vec object
|
# instantiate a Doc2Vec object
|
||||||
doc2vec_model = Doc2Vec(training_data, vector_size=5, window=2, min_count=1, workers=4)
|
doc2vec_model = Doc2Vec(training_data, vector_size=100, window=2, min_count=1, workers=4)
|
||||||
|
|
||||||
print(doc2vec_model.docvecs[0])
|
print(doc2vec_model.docvecs[0])
|
||||||
print(doc2vec_model.docvecs[1])
|
print(doc2vec_model.docvecs[1])
|
||||||
|
@ -132,4 +132,4 @@ if __name__ == '__main__':
|
||||||
quotechar='\'')
|
quotechar='\'')
|
||||||
|
|
||||||
# select only labeled articles
|
# select only labeled articles
|
||||||
MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1][:100].reset_index(drop=True), sklearn_cv=False, percentile=100)
|
MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1].reset_index(drop=True), sklearn_cv=False, percentile=100)
|
Loading…
Reference in New Issue