evaluation interactive labeling: update
This commit is contained in:
parent
a2c7a7279e
commit
b2aa2df0f7
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -56,7 +56,7 @@ class LabelingPlotter():
|
||||||
|
|
||||||
def plot_cumulative():
|
def plot_cumulative():
|
||||||
# load pickle object
|
# load pickle object
|
||||||
with open('../obj/array_class_probs_round_11.pkl', 'rb') as input:
|
with open('../obj/array_class_probs_stratified_round_9.pkl', 'rb') as input:
|
||||||
list = pickle.load(input)
|
list = pickle.load(input)
|
||||||
|
|
||||||
# sort list in descending order
|
# sort list in descending order
|
||||||
|
@ -86,11 +86,12 @@ class LabelingPlotter():
|
||||||
ax.set_xlabel('Highest estimated probability')
|
ax.set_xlabel('Highest estimated probability')
|
||||||
ax.set_ylabel('Fraction of articles with this highest estimated probability')
|
ax.set_ylabel('Fraction of articles with this highest estimated probability')
|
||||||
#plt.axis([0.5, 0.99, 0, 0.006]) #round 9
|
#plt.axis([0.5, 0.99, 0, 0.006]) #round 9
|
||||||
|
plt.axis([0.5, 1, 0, 0.015]) # round 9 stratified
|
||||||
#plt.axis([0.65, 1, 0, 0.003]) # round 10
|
#plt.axis([0.65, 1, 0, 0.003]) # round 10
|
||||||
plt.axis([0.7, 1, 0, 0.002]) # round 11
|
#plt.axis([0.7, 1, 0, 0.002]) # round 11
|
||||||
#ax.set_xbound(lower=0.5, upper=0.99)
|
#ax.set_xbound(lower=0.5, upper=0.99)
|
||||||
plt.savefig('..\\visualization\\proba_round_11.png')
|
plt.savefig('..\\visualization\\proba_stratified_round_9.png')
|
||||||
plt.savefig('..\\visualization\\proba_round_11.eps')
|
plt.savefig('..\\visualization\\proba_stratified_round_9.eps')
|
||||||
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,11 @@ class MultinomialNaiveBayes:
|
||||||
# split data into text and label set
|
# split data into text and label set
|
||||||
# join title and text
|
# join title and text
|
||||||
X = dataset['Title'] + '. ' + dataset['Text']
|
X = dataset['Title'] + '. ' + dataset['Text']
|
||||||
|
|
||||||
|
print(X[:12])
|
||||||
|
|
||||||
y = dataset['Label']
|
y = dataset['Label']
|
||||||
|
print(y[:12])
|
||||||
|
|
||||||
if sklearn_cv:
|
if sklearn_cv:
|
||||||
cv = CountVectorizer()
|
cv = CountVectorizer()
|
||||||
|
@ -57,6 +61,12 @@ class MultinomialNaiveBayes:
|
||||||
if sklearn_cv:
|
if sklearn_cv:
|
||||||
# use sklearn CountVectorizer
|
# use sklearn CountVectorizer
|
||||||
# fit the training data and then return the matrix
|
# fit the training data and then return the matrix
|
||||||
|
print('Title + Text von train')
|
||||||
|
print(X[train])
|
||||||
|
|
||||||
|
print('Label von train')
|
||||||
|
print(y[train])
|
||||||
|
|
||||||
training_data = cv.fit_transform(X[train], y[train]).toarray()
|
training_data = cv.fit_transform(X[train], y[train]).toarray()
|
||||||
# transform testing data and return the matrix
|
# transform testing data and return the matrix
|
||||||
testing_data = cv.transform(X[test]).toarray()
|
testing_data = cv.transform(X[test]).toarray()
|
||||||
|
@ -172,4 +182,24 @@ class MultinomialNaiveBayes:
|
||||||
print(y_test[i])
|
print(y_test[i])
|
||||||
print()
|
print()
|
||||||
#print metrics
|
#print metrics
|
||||||
print('F1 score: ', format(f1_score(y_test, predictions)))
|
print('F1 score: ', format(f1_score(y_test, predictions)))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# read csv file
|
||||||
|
print('# reading dataset')
|
||||||
|
print('# ...')
|
||||||
|
|
||||||
|
# read current data set from csv
|
||||||
|
df = pd.read_csv('../data/interactive_labeling_round_11.csv',
|
||||||
|
sep='|',
|
||||||
|
usecols=range(1,13), # drop first column 'unnamed'
|
||||||
|
encoding='utf-8',
|
||||||
|
quoting=csv.QUOTE_NONNUMERIC,
|
||||||
|
quotechar='\'')
|
||||||
|
|
||||||
|
# select only labeled articles
|
||||||
|
#print('Anzahl aller gelabelten:')
|
||||||
|
#print(len(df.loc[df['Label'] != -1]))
|
||||||
|
#print(df.loc[df['Label'] != -1][:5])
|
||||||
|
MultinomialNaiveBayes.make_mnb(df.loc[df['Label'] != -1].reindex(), sklearn_cv=True, percentile=100)
|
|
@ -1,7 +1,7 @@
|
||||||
%!PS-Adobe-3.0 EPSF-3.0
|
%!PS-Adobe-3.0 EPSF-3.0
|
||||||
%%Title: ..\visualization\proba_round_11.eps
|
%%Title: ..\visualization\proba_round_11.eps
|
||||||
%%Creator: matplotlib version 3.0.2, http://matplotlib.org/
|
%%Creator: matplotlib version 3.0.2, http://matplotlib.org/
|
||||||
%%CreationDate: Thu Feb 21 14:11:04 2019
|
%%CreationDate: Tue Mar 5 08:51:48 2019
|
||||||
%%Orientation: portrait
|
%%Orientation: portrait
|
||||||
%%BoundingBox: 18 252 594 540
|
%%BoundingBox: 18 252 594 540
|
||||||
%%EndComments
|
%%EndComments
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Loading…
Reference in New Issue