cleaned dataset

This commit is contained in:
Anne Lorenz 2018-11-05 13:27:52 +01:00
parent b7d1f546e4
commit 2243a50ed0
4 changed files with 8 additions and 20005 deletions

4
NER.py
View File

@ -157,8 +157,8 @@ if __name__ == '__main__':
header=None,
index_col=None,
engine='python',
#usecols=[1,2],
nrows=100,
# usecols=[1,2],
# nrows=100,
quoting=csv.QUOTE_NONNUMERIC,
quotechar='\'')
#print(df)

View File

@ -97,7 +97,7 @@ class VisualizerNews:
# texts = df_hits['Title'] + '. ' + df_hits['Text']
texts = df[1] + '. ' + df[2]
# dict: count articles with company names
# list: count articles with company names
count_names = NER.count_companies(texts)
# sort list in descending order
@ -265,6 +265,11 @@ class VisualizerNews:
.format(VisualizerNews.datestring))
plt.show()
def plot_hist_num_comp_per_art():
''' open pkl file of dict, plot histogram of number of different
company names per article.
'''
if __name__ == '__main__':
VisualizerNews.plot_wordcloud_dataset()
# VisualizerNews.plot_histogram_companies()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long