evaluation interactive labeling: update
This commit is contained in:
parent
a2c7a7279e
commit
b2aa2df0f7
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -56,7 +56,7 @@ class LabelingPlotter():
|
|||
|
||||
def plot_cumulative():
|
||||
# load pickle object
|
||||
with open('../obj/array_class_probs_round_11.pkl', 'rb') as input:
|
||||
with open('../obj/array_class_probs_stratified_round_9.pkl', 'rb') as input:
|
||||
list = pickle.load(input)
|
||||
|
||||
# sort list in descending order
|
||||
|
@ -86,11 +86,12 @@ class LabelingPlotter():
|
|||
ax.set_xlabel('Highest estimated probability')
|
||||
ax.set_ylabel('Fraction of articles with this highest estimated probability')
|
||||
#plt.axis([0.5, 0.99, 0, 0.006]) #round 9
|
||||
plt.axis([0.5, 1, 0, 0.015]) # round 9 stratified
|
||||
#plt.axis([0.65, 1, 0, 0.003]) # round 10
|
||||
plt.axis([0.7, 1, 0, 0.002]) # round 11
|
||||
#plt.axis([0.7, 1, 0, 0.002]) # round 11
|
||||
#ax.set_xbound(lower=0.5, upper=0.99)
|
||||
plt.savefig('..\\visualization\\proba_round_11.png')
|
||||
plt.savefig('..\\visualization\\proba_round_11.eps')
|
||||
plt.savefig('..\\visualization\\proba_stratified_round_9.png')
|
||||
plt.savefig('..\\visualization\\proba_stratified_round_9.eps')
|
||||
|
||||
plt.show()
|
||||
|
||||
|
|
|
@ -25,7 +25,11 @@ class MultinomialNaiveBayes:
|
|||
# split data into text and label set
|
||||
# join title and text
|
||||
X = dataset['Title'] + '. ' + dataset['Text']
|
||||
|
||||
print(X[:12])
|
||||
|
||||
y = dataset['Label']
|
||||
print(y[:12])
|
||||
|
||||
if sklearn_cv:
|
||||
cv = CountVectorizer()
|
||||
|
@ -57,6 +61,12 @@ class MultinomialNaiveBayes:
|
|||
if sklearn_cv:
|
||||
# use sklearn CountVectorizer
|
||||
# fit the training data and then return the matrix
|
||||
print('Title + Text von train')
|
||||
print(X[train])
|
||||
|
||||
print('Label von train')
|
||||
print(y[train])
|
||||
|
||||
training_data = cv.fit_transform(X[train], y[train]).toarray()
|
||||
# transform testing data and return the matrix
|
||||
testing_data = cv.transform(X[test]).toarray()
|
||||
|
@ -173,3 +183,23 @@ class MultinomialNaiveBayes:
|
|||
print()
|
||||
#print metrics
|
||||
print('F1 score: ', format(f1_score(y_test, predictions)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# read csv file
|
||||
print('# reading dataset')
|
||||
print('# ...')
|
||||
|
||||
# read current data set from csv
|
||||
df = pd.read_csv('../data/interactive_labeling_round_11.csv',
|
||||
sep='|',
|
||||
usecols=range(1,13), # drop first column 'unnamed'
|
||||
encoding='utf-8',
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
quotechar='\'')
|
||||
|
||||
# select only labeled articles
|
||||
#print('Anzahl aller gelabelten:')
|
||||
#print(len(df.loc[df['Label'] != -1]))
|
||||
#print(df.loc[df['Label'] != -1][:5])
|
||||
MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1].reindex(), sklearn_cv=True, percentile=100)
|
|
@ -1,7 +1,7 @@
|
|||
%!PS-Adobe-3.0 EPSF-3.0
|
||||
%%Title: ..\visualization\proba_round_11.eps
|
||||
%%Creator: matplotlib version 3.0.2, http://matplotlib.org/
|
||||
%%CreationDate: Thu Feb 21 14:11:04 2019
|
||||
%%CreationDate: Tue Mar 5 08:51:48 2019
|
||||
%%Orientation: portrait
|
||||
%%BoundingBox: 18 252 594 540
|
||||
%%EndComments
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Loading…
Reference in New Issue