Commit d33638a0 by Michelle Awh

docstrings

parent e7dc01bb
Showing with 13 additions and 0 deletions
......@@ -568,6 +568,9 @@ class Article:
def related_enough(self):
'''
'''
links = self.all_peripheral_links(self.__soup, self.url)
articles, categories = self.extract_titles(links)
actual_related_articles = {}
......@@ -590,6 +593,10 @@ class Article:
def pre_processing(self, wh_page, caps):
'''
Takes a single string representing all of the text on a WikiHow page
and returns it as a list of individual words.
'''
tokenizer = nltk.RegexpTokenizer(r'\w+')
list_of_words = tokenizer.tokenize(wh_page)
list_of_words = [word for word in list_of_words if word not in stopwords.words('english')]
......@@ -597,6 +604,9 @@ class Article:
def n_gram(self, wh_page, caps, n):
'''
Takes a list of individual words and creates ngrams of size n
'''
n_gram_lst = []
list_of_words = self.pre_processing(wh_page,caps)
start_value = 0
......@@ -609,6 +619,9 @@ class Article:
def find_common_words(self, string):
'''
Takes a list of ngrams and finds the most common entries.
'''
words_lst = Counter()
string_lst = self.n_gram(string, False, 1)
words_lst.update(w for w in string_lst)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment