update labeling

This commit is contained in:
Anne Lorenz 2019-01-15 10:37:07 +01:00
parent 035583584f
commit 6471a81196
3 changed files with 25576 additions and 773 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -32,6 +32,7 @@ class MNBInteractive:
# split unlabeled data into text and label set # split unlabeled data into text and label set
# join title and text # join title and text
U = unlabeled_data['Title'] + '. ' + unlabeled_data['Text'] U = unlabeled_data['Title'] + '. ' + unlabeled_data['Text']
l = unlabeled_data['Label']
if sklearn_cv: if sklearn_cv:
cv = CountVectorizer() cv = CountVectorizer()
@ -87,13 +88,11 @@ class MNBInteractive:
# number of samples encountered for each class during fitting # number of samples encountered for each class during fitting
# this value is weighted by the sample weight when provided # this value is weighted by the sample weight when provided
# class_count = classifier.class_count_ class_count = classifier.class_count_
# classes in order used # classes in order used
classes = classifier.classes_ classes = classifier.classes_
class_count = classifier.class_count_
print('# MNB: ending multinomial naive bayes') print('# MNB: ending multinomial naive bayes')
# return classes and vector of class estimates # return classes and vector of class estimates