import matplotlib import matplotlib.pyplot as plt import numpy as np import pickle class LabelingPlotter(): def plot_labeling_rounds(): # round numbers round = [0,1,2,3,4,5,6,7,8,9] # number of wrong estimated labels per round wrong = [0/100, 19/100, 17/100, 16/100, 20/100, 12/100, 10/100, 20/100, 14/100, 12/100] # number of manual classified articles per class and round man_0 = [84/100, 165/200, 247/300, 329/400, 410/500, 498/600, 586/700, 662/800, 741/900, 821/1000] man_1 = [3/100, 7/200, 12/300, 16/400, 20/500, 22/600, 23/700, 29/800, 37/900, 39/1000] man_2 = [13/100, 28/200, 41/300, 55/400, 70/500, 80/600, 91/700, 109/800, 122/900, 140/1000] # number of estimated labels per class and round est_0 = [9873/9900, 9757/9800, 9603/9700, 9470/9600, 9735/9500, 9238/9400, 9107/9300, 8007/9200, 8064/9100, 7641/9000] est_1 = [14/9900, 15/9800, 11/9700, 11/9600, 16/9500, 17/9400, 18/9300, 19/9200, 18/9100, 20/9000] est_2 = [12/9900, 26/9800, 77/9700, 94/9600, 380/9500, 123/9400, 147/9300, 676/9200, 595/9100, 837/9000] fig, ax = plt.subplots(3, 1) ax[0].plot(round, wrong) ax[2].set_xlabel('Iteration number') ax[0].set_ylabel('Error rate') ax[1].plot(round, man_0, round, man_1, round, man_2) ax[1].set_ylabel('Fraction of manual labels') ax[2].plot(round, est_0, round, est_1, round, est_2) ax[2].set_ylabel('Fraction of estimated labels') # limit x axis ax[0].set_xbound(lower=1, upper=9) ax[1].set_xbound(lower=1, upper=9) ax[2].set_xbound(lower=1, upper=9) ax[0].set_ybound(lower=0) ax[1].set_ybound(lower=0) #ax[2].set_ybound(lower=0) # insert legend ax[1].legend(('class 0', 'class 1', 'class 2')) ax[2].legend(('class 0', 'class 1', 'class 2')) fig.tight_layout() plt.savefig('..\\visualization\\Labeling_Grafik_070219.png') plt.show() def plot_cumulative(): # load pickle object with open('../obj/array_class_probs_round_9.pkl', 'rb') as input: list = pickle.load(input) # sort list in descending order list.sort(reverse=True) # convert list to array probas = np.asarray(list) mu = 200 sigma = 25 n_bins = 50 fig, ax = plt.subplots(figsize=(8, 4)) # plot the cumulative histogram n, bins, patches = ax.hist(probas, n_bins, density=1, histtype='step', cumulative=True, facecolor='darkred') # manipulate #vals = ax.get_yticks() #ax.set_yticklabels(['{:,.1%}'.format(x / 200) for x in vals]) ax.grid(True) ax.legend(loc='right') #ax.set_title('Cumulative distribution of highest estimated probability') ax.set_xlabel('Highest estimated probability') ax.set_ylabel('Fraction of articles with this highest estimated probability') #plt.axis([0.5, 0.99, 0, 0.006]) #ax.set_xbound(lower=0.5, upper=0.99) plt.show() if __name__ == '__main__': LabelingPlotter.plot_cumulative()