cleaned dataset
This commit is contained in:
parent
b7d1f546e4
commit
2243a50ed0
4
NER.py
4
NER.py
|
@ -157,8 +157,8 @@ if __name__ == '__main__':
|
||||||
header=None,
|
header=None,
|
||||||
index_col=None,
|
index_col=None,
|
||||||
engine='python',
|
engine='python',
|
||||||
#usecols=[1,2],
|
# usecols=[1,2],
|
||||||
nrows=100,
|
# nrows=100,
|
||||||
quoting=csv.QUOTE_NONNUMERIC,
|
quoting=csv.QUOTE_NONNUMERIC,
|
||||||
quotechar='\'')
|
quotechar='\'')
|
||||||
#print(df)
|
#print(df)
|
||||||
|
|
|
@ -97,7 +97,7 @@ class VisualizerNews:
|
||||||
# texts = df_hits['Title'] + '. ' + df_hits['Text']
|
# texts = df_hits['Title'] + '. ' + df_hits['Text']
|
||||||
texts = df[1] + '. ' + df[2]
|
texts = df[1] + '. ' + df[2]
|
||||||
|
|
||||||
# dict: count articles with company names
|
# list: count articles with company names
|
||||||
count_names = NER.count_companies(texts)
|
count_names = NER.count_companies(texts)
|
||||||
|
|
||||||
# sort list in descending order
|
# sort list in descending order
|
||||||
|
@ -265,6 +265,11 @@ class VisualizerNews:
|
||||||
.format(VisualizerNews.datestring))
|
.format(VisualizerNews.datestring))
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
def plot_hist_num_comp_per_art():
|
||||||
|
''' open pkl file of dict, plot histogram of number of different
|
||||||
|
company names per article.
|
||||||
|
'''
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
VisualizerNews.plot_wordcloud_dataset()
|
VisualizerNews.plot_wordcloud_dataset()
|
||||||
# VisualizerNews.plot_histogram_companies()
|
# VisualizerNews.plot_histogram_companies()
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue