diff --git a/BagOfWords.py b/BagOfWords.py
index d5b5720..b98bc6f 100644
--- a/BagOfWords.py
+++ b/BagOfWords.py
@@ -15,6 +15,7 @@ from collections import OrderedDict
 import csv
 import re
 
+import numpy as np
 import pandas as pd
 from nltk.stem.porter import PorterStemmer
 
@@ -48,6 +49,48 @@ class BagOfWords:
                 words_cleaned.append(word)
         return words_cleaned
 
+    # def make_matrix(series, vocab, relative_word_frequencies=True, stemming=True):
+        # '''calculates word stem frequencies in input articles. returns
+        # document term matrix(DataFrame) with relative word frequencies
+        # (0 <= values < 1) if relative_word_frequencies=True or absolute
+        # word frequencies (int) if relative_word_frequencies=False.
+        # (rows: different articles, colums: different words in vocab)
+        # returns matrix as DataFrame
+        # '''
+        # print('# BOW: calculating matrix...')
+        # print()
+        # # create list of tuples
+        # vectors = []
+        # # for every text in series
+        # for i in range(len(series)):
+            # # extract text of single article
+            # text = series.iloc[i]
+            # # extract its words
+            # words = BagOfWords.extract_words(text, stemming)
+            # # count words in single article
+            # word_count = len(words)
+            # vector = []
+            # for i, v in enumerate(vocab):
+                # vector.append(0)
+                # for w in words:
+                    # if w == v:
+                        # if relative_word_frequencies:
+                            # # relative word frequency
+                            # vector[i] += 1/word_count
+                        # else:
+                            # # absolute word frequency
+                            # vector[i] += 1
+
+            # # !!! hier passiert immer der MemoryError: !!!
+
+            # # add single vector as tuple
+            # vectors.append(tuple(vector))
+        # df_vectors = pd.DataFrame.from_records(vectors,
+                                               # index=None,
+                                               # #header=vocab,
+                                               # columns=vocab)
+        # return df_vectors
+
     def make_matrix(series, vocab, relative_word_frequencies=True, stemming=True):
         '''calculates word stem frequencies in input articles. returns
         document term matrix(DataFrame) with relative word frequencies
@@ -58,34 +101,35 @@ class BagOfWords:
         '''
         print('# BOW: calculating matrix...')
         print()
-        # create list of tuples
-        vectors = []
+        # create zero-filled dataframe
+        array = np.zeros(shape=(len(series),len(vocab)))
+        df_matrix = pd.DataFrame(array, columns=vocab)
+
         # for every text in series
         for i in range(len(series)):
+
             # extract text of single article
             text = series.iloc[i]
+
             # extract its words
             words = BagOfWords.extract_words(text, stemming)
-            # count words in single article
+            # count words in article
             word_count = len(words)
-            vector = []
-            for i, v in enumerate(vocab):
-                vector.append(0)
+
+            # for every word in global vocab
+            for v in vocab:
+                # for every word in article
                 for w in words:
+                    # find right position
                     if w == v:
                         if relative_word_frequencies:
                             # relative word frequency
-                            vector[i] += 1/word_count
+                            df_matrix.loc[i][v] += 1/word_count
                         else:
                             # absolute word frequency
-                            vector[i] += 1
-            # add single vector as tuple
-            vectors.append(tuple(vector))
-        df_vectors = pd.DataFrame.from_records(vectors,
-                                               index=None,
-                                               #header=vocab,
-                                               columns=vocab)
-        return df_vectors
+                            df_matrix.loc[i][v] += 1
+
+        return df_matrix
 
     def make_vocab(series, stemming=True):
         '''adds words of input articles to a global vocabulary.
@@ -158,10 +202,14 @@ class BagOfWords:
             # transform list to set to eliminate duplicates
         return set(stop_words)
 
-    def make_dict_common_words(texts, rel_freq=False, stemming=True, n=200):
+    def make_dict_common_words(texts, rel_freq=True, stemming=True, n=200):
         '''texts: df of article texts of complete data set as series,
         return dict of words with their count.
         '''
+        # words under that rel_freq limit are not included
+        limit = 0.0005
+        if not rel_freq:
+            limit = 25
         # word => count
         dict = {}
         vocab = BagOfWords.make_vocab(texts, stemming)
@@ -171,7 +219,8 @@ class BagOfWords:
         # iterate over words
         for column in df_matrix:
             # count word mentions in total
-            dict[column] = df_matrix[column].sum()
+            if (df_matrix[column].sum() > limit):
+                dict[column] = df_matrix[column].sum()
         # sort dict by value and 
         o_dict = OrderedDict(sorted(dict.items(), key=lambda t: t[1],\
                              reverse=True))
@@ -182,9 +231,19 @@ class BagOfWords:
         return n_dict
 
     def count_features(texts, stemming=True):
+        print('# counting all features in corpus...')
+        print()
         vocab = BagOfWords.make_vocab(texts, True)
         return len(vocab)
 
+    def count_all_words(texts):
+        print('# counting all words in corpus...')
+        print()
+        sum = 0
+        for text in texts:
+            sum += len(text.split())
+        return sum
+
 if __name__ == '__main__':
 
     # load new data set
@@ -195,16 +254,16 @@ if __name__ == '__main__':
                              index_col=None,
                              engine='python',
                              usecols=[1,2],
-                             #nrows=10,
+                             nrows=3000,
                              quoting=csv.QUOTE_NONNUMERIC,
                              quotechar='\'')
 
     # find most common words in dataset
     corpus = df_dataset[1] + '. ' + df_dataset[2]
-    # stemming = False
-    # vocab = BagOfWords.make_vocab(corpus, stemming)
-    # print(vocab)
-    # print()
+    stemming = False
+    rel_freq = False
+    vocab = BagOfWords.make_vocab(corpus, stemming)
+
     # print(BagOfWords.make_matrix(corpus, vocab, False, stemming))
-    # print(BagOfWords.make_dict_common_words(corpus, False, stemming, 200))
-    print(BagOfWords.count_features(corpus))
\ No newline at end of file
+    print(BagOfWords.make_dict_common_words(corpus, rel_freq, stemming, 200))
+    # print(BagOfWords.count_features(corpus))
\ No newline at end of file
diff --git a/VisualizerNews.py b/VisualizerNews.py
index 97fe238..31724ab 100644
--- a/VisualizerNews.py
+++ b/VisualizerNews.py
@@ -10,6 +10,7 @@ from NER import NER
 import csv
 from os import path
 
+import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -43,7 +44,10 @@ class VisualizerNews:
                                                  stemming=False,
                                                  n=200)
 
-        wordcloud = WordCloud(width=2400, height=1200, scale=2,
+        wordcloud = WordCloud(background_color='white',
+                              width=2400, 
+                              height=1200, 
+                              scale=2,
                               # true if bigram:
                               collocations=False).generate_from_frequencies(dict)
 
@@ -72,7 +76,7 @@ class VisualizerNews:
         # only articles with label==1
         df_hits = df[df['Label'] == 1]
 
-        texts = df_hits['Title'] + ' ' + df_hits['Text']
+        texts = df_hits['Title'] + '. ' + df_hits['Text']
 
         # # zum prüfen lesen
         # for text in texts[10:20]:
@@ -93,7 +97,7 @@ class VisualizerNews:
         # Number of companies with this number of mentions
         plt.ylabel('Number of companies with this number of articles')
         num_bins = 50
-        n, bins, patches = plt.hist(names, num_bins, facecolor='blue', alpha=0.5)
+        n, bins, patches = plt.hist(names, num_bins, facecolor='darkred', alpha=0.5)
         # plt.grid(True)
         plt.show()
 
@@ -132,13 +136,16 @@ class VisualizerNews:
         # convert list to array
         names = np.asarray(count_chars)
         # plt.title('Length of News Articles')
-        plt.xlabel('Number of Characters in an Article')
+        plt.xlabel('Number of characters in an article')
         plt.ylabel('Frequency')
         # number of vertical bins
         num_bins = 200
-        n, bins, patches = plt.hist(names, num_bins, facecolor='blue', alpha=0.5)
+        n, bins, patches = plt.hist(names, num_bins, facecolor='darkslategrey', alpha=0.5)
         # [xmin, xmax, ymin, ymax] of axis
-        plt.axis([300, 10000, 0, 500])
+        #plt.axis([format(300, ','),format(10000, ','), 0, 500])
+        plt.axis([300,10000,0,500])
+        # format axis labels for thousends (e.g. '10,000')
+        plt.gca().xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
         plt.show()
 
     def plot_pie_chart_of_sites():
@@ -191,7 +198,7 @@ class VisualizerNews:
                                  #usecols=[1,2],
                                  index_col=None,
                                  engine='python',
-                                 #nrows=100,
+                                 #nrows=1000,
                                  quoting=csv.QUOTE_NONNUMERIC,
                                  quotechar='\'')
 
@@ -199,22 +206,25 @@ class VisualizerNews:
 
         # find most common words in dataset
         dict = BagOfWords.make_dict_common_words(corpus,
-                                                 rel_freq=False,
+                                                 rel_freq=True,
                                                  stemming=False,
                                                  n=n_commons)
 
-        plt.xlabel('Most Common Words in News Articles')
-        plt.ylabel('Frequency')
+        plt.xlabel('Most common words in textual corpus')
+        plt.ylabel('Relative frequency')
 
         labels = list(dict.keys())
         numbers = list(dict.values())
         nbars = n_commons
-        plt.bar(np.arange(nbars), height=numbers, tick_label=labels)
+        plt.bar(np.arange(nbars), 
+                height=numbers, 
+                tick_label=labels, 
+                facecolor='darkorange')
         plt.show()
 
 if __name__ == '__main__':
     # VisualizerNews.plot_histogram_companies()
     # VisualizerNews.plot_wordcloud_dataset()
     # VisualizerNews.plot_histogram_text_lengths()
-    VisualizerNews.plot_pie_chart_of_sites()
-    # VisualizerNews.plot_hist_most_common_words()
\ No newline at end of file
+    # VisualizerNews.plot_pie_chart_of_sites()
+    VisualizerNews.plot_hist_most_common_words()
\ No newline at end of file