fixed conflicts
This commit is contained in:
commit
bbcffc9954
|
@ -54,6 +54,7 @@ class FilterKeywords:
|
|||
for kword in keywords:
|
||||
if re.match(kword, key):
|
||||
# if match, increase value of matching key
|
||||
## DORIS: Hier könntest du ein defaultdict verwenden, https://www.accelebrate.com/blog/using-defaultdict-python/
|
||||
if str(kword) in dict_keywords:
|
||||
dict_keywords[str(kword)] += dict_input[key]
|
||||
else:
|
||||
|
|
|
@ -76,6 +76,8 @@ class NaiveBayes:
|
|||
# selector = SelectPercentile(percentile=25)
|
||||
# selector.fit(training_data, y[train])
|
||||
|
||||
##DORIS: WIRD SELECT PERCENTILE IN DEINE ARBEIT MIT NB EINBEZOGEN?
|
||||
|
||||
# training_data_r = selector.transform(training_data)
|
||||
# testing_data_r = selector.transform(testing_data)
|
||||
|
||||
|
@ -95,6 +97,7 @@ class NaiveBayes:
|
|||
rec = recall_score(y[test], predictions_test)
|
||||
print('rec: ' + str(rec))
|
||||
recall_scores.append(rec)
|
||||
##DORIS: PRECISION MISST DU AUCH MIT DEN TEST SCORES!!!
|
||||
prec = precision_score(y[train], predictions_train)
|
||||
print('prec: ' + str(prec))
|
||||
print('#')
|
||||
|
@ -187,6 +190,8 @@ class NaiveBayes:
|
|||
print('# reading dataset')
|
||||
print('# ...')
|
||||
|
||||
## DORIS: ICH VERSTEHE NICHT, WARUM DU HIER EINE EXTRA FUNKTION SCHREIBST, PD.READ_CSV MÜSSTE DOCH AUCH SO GEHEN?
|
||||
## KOMMT VIELLEICHT NOCH, VIELLEICHT BIN ICH ZU VORSCHNELL
|
||||
dataset = CsvHandler.read_csv(file)
|
||||
|
||||
make_naive_bayes(dataset)
|
||||
|
|
|
@ -31,6 +31,7 @@ class NaiveBayes_simple:
|
|||
|
||||
cv = CountVectorizer()
|
||||
|
||||
##DORIS: DU BRAUCHST IMMER EINEN STRATIFIED SPLIT, WEIL DIEN DATASET UNBALANCED IST
|
||||
# k-fold cross-validation as split method
|
||||
kf = KFold(n_splits=10, shuffle=True, random_state=5)
|
||||
|
||||
|
@ -69,6 +70,7 @@ class NaiveBayes_simple:
|
|||
rec = recall_score(y[test], predictions_test)
|
||||
print('rec: ' + str(rec))
|
||||
recall_scores.append(rec)
|
||||
##DORIS: PRECISION MISST DU AUCH MIT DEN TEST SCORES!!!
|
||||
prec = precision_score(y[train], predictions_train)
|
||||
print('prec: ' + str(prec))
|
||||
print('#')
|
||||
|
|
|
@ -87,10 +87,12 @@ class Requester:
|
|||
article.append(section)
|
||||
# add article to list
|
||||
list_articles.append(article)
|
||||
## DORIS: WARUM SCHREIBST DU ES NICHT DIREKT IN EINE CSV, SONDERN KONVERTIERST NOCHMAL?
|
||||
|
||||
# Get the next batch of 100 posts
|
||||
output = webhoseio.get_next()
|
||||
|
||||
|
||||
# create DataFrame
|
||||
df = pd.DataFrame(data=list_articles,
|
||||
columns=['Timestamp', 'Title', 'Text', 'SiteSection'])
|
||||
|
|
Loading…
Reference in New Issue