cleaned dataset
This commit is contained in:
parent
b7d1f546e4
commit
2243a50ed0
4
NER.py
4
NER.py
|
@ -157,8 +157,8 @@ if __name__ == '__main__':
|
|||
header=None,
|
||||
index_col=None,
|
||||
engine='python',
|
||||
#usecols=[1,2],
|
||||
nrows=100,
|
||||
# usecols=[1,2],
|
||||
# nrows=100,
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
quotechar='\'')
|
||||
#print(df)
|
||||
|
|
|
@ -97,7 +97,7 @@ class VisualizerNews:
|
|||
# texts = df_hits['Title'] + '. ' + df_hits['Text']
|
||||
texts = df[1] + '. ' + df[2]
|
||||
|
||||
# dict: count articles with company names
|
||||
# list: count articles with company names
|
||||
count_names = NER.count_companies(texts)
|
||||
|
||||
# sort list in descending order
|
||||
|
@ -265,6 +265,11 @@ class VisualizerNews:
|
|||
.format(VisualizerNews.datestring))
|
||||
plt.show()
|
||||
|
||||
def plot_hist_num_comp_per_art():
|
||||
''' open pkl file of dict, plot histogram of number of different
|
||||
company names per article.
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
VisualizerNews.plot_wordcloud_dataset()
|
||||
# VisualizerNews.plot_histogram_companies()
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue