thesis-anne/src/LabelingPlotter.py

92 lines
2.8 KiB
Python

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pickle
class LabelingPlotter():
def plot_labeling_rounds():
# round numbers
round = [0,1,2,3,4,5,6,7,8,9]
# number of wrong estimated labels per round
wrong = [0/100, 19/100, 17/100, 16/100, 20/100, 12/100, 10/100, 20/100, 14/100, 12/100]
# number of manual classified articles per class and round
man_0 = [84/100, 165/200, 247/300, 329/400, 410/500, 498/600, 586/700, 662/800, 741/900, 821/1000]
man_1 = [3/100, 7/200, 12/300, 16/400, 20/500, 22/600, 23/700, 29/800, 37/900, 39/1000]
man_2 = [13/100, 28/200, 41/300, 55/400, 70/500, 80/600, 91/700, 109/800, 122/900, 140/1000]
# number of estimated labels per class and round
est_0 = [9873/9900, 9757/9800, 9603/9700, 9470/9600, 9735/9500, 9238/9400, 9107/9300, 8007/9200, 8064/9100, 7641/9000]
est_1 = [14/9900, 15/9800, 11/9700, 11/9600, 16/9500, 17/9400, 18/9300, 19/9200, 18/9100, 20/9000]
est_2 = [12/9900, 26/9800, 77/9700, 94/9600, 380/9500, 123/9400, 147/9300, 676/9200, 595/9100, 837/9000]
fig, ax = plt.subplots(3, 1)
ax[0].plot(round, wrong)
ax[2].set_xlabel('Iteration number')
ax[0].set_ylabel('Error rate')
ax[1].plot(round, man_0, round, man_1, round, man_2)
ax[1].set_ylabel('Fraction of manual labels')
ax[2].plot(round, est_0, round, est_1, round, est_2)
ax[2].set_ylabel('Fraction of estimated labels')
# limit x axis
ax[0].set_xbound(lower=1, upper=9)
ax[1].set_xbound(lower=1, upper=9)
ax[2].set_xbound(lower=1, upper=9)
ax[0].set_ybound(lower=0)
ax[1].set_ybound(lower=0)
#ax[2].set_ybound(lower=0)
# insert legend
ax[1].legend(('class 0', 'class 1', 'class 2'))
ax[2].legend(('class 0', 'class 1', 'class 2'))
fig.tight_layout()
plt.savefig('..\\visualization\\Labeling_Grafik_070219.png')
plt.show()
def plot_cumulative():
# load pickle object
with open('../obj/array_class_probs_round_9.pkl', 'rb') as input:
list = pickle.load(input)
# sort list in descending order
list.sort(reverse=True)
# convert list to array
probas = np.asarray(list)
mu = 200
sigma = 25
n_bins = 50
fig, ax = plt.subplots(figsize=(8, 4))
# plot the cumulative histogram
n, bins, patches = ax.hist(probas, n_bins, density=1, histtype='step',
cumulative=True, facecolor='darkred')
# manipulate
#vals = ax.get_yticks()
#ax.set_yticklabels(['{:,.1%}'.format(x / 200) for x in vals])
ax.grid(True)
ax.legend(loc='right')
#ax.set_title('Cumulative distribution of highest estimated probability')
ax.set_xlabel('Highest estimated probability')
ax.set_ylabel('Fraction of articles with this highest estimated probability')
#plt.axis([0.5, 0.99, 0, 0.006])
#ax.set_xbound(lower=0.5, upper=0.99)
plt.show()
if __name__ == '__main__':
LabelingPlotter.plot_cumulative()