Commit 7e06e29c by Ryan Lee

Docstrings added

parent f77600a7
Showing with 78 additions and 0 deletions
......@@ -93,6 +93,12 @@ class SearchResults:
def get_search_soup(self, query):
"""
Take a string of search terms and return a soup object
Input: query
Returns: soup object
"""
search = '+'.join(query.split())
response = requests.get('https://www.wikihow.com/wikiHowTo?search=' + search)
soup = BeautifulSoup(response.content, 'html.parser')
......@@ -100,6 +106,13 @@ class SearchResults:
def passes_outer_filters(self, outer_article):
"""
Using an outer_article object, check to make sure the
article fits the chosen filters
Input: outer_article object
Returns: Boolean representing if the article meets the given conditions
"""
if 'date_updated' in self.__filters:
num, interval = outer_article.date_updated.split()[:2]
num = int(num)
......@@ -134,6 +147,13 @@ class SearchResults:
def get_required_words(self, query):
"""
Given a string of words required for the results, split
and return as a list
Input: (str) query representing words to look for
Returns: List of search terms
"""
required_words = []
match = re.findall('".*"', query)
if match == []:
......@@ -144,6 +164,13 @@ class SearchResults:
def get_forbidden_words(self, query):
"""
Given a string of words used to filter out results, split
and return as a list
Input: (str) query representing words to avoid in results
Returns: List of forbidden words
"""
forbidden_words = []
match = re.search('-.*$', query)
if match == None:
......@@ -156,6 +183,13 @@ class SearchResults:
def passes_all_filters(self, article):
"""
Given an article object, check to make sure that the
article matches the conditions set by the filters
Input: article object
Returns: boolean representing if the conditions have been met
"""
if 'child_safe' in self.__filters:
if self.__filters['child_safe']:
self.__forbidden_words += inappropriate_words_lst
......@@ -193,6 +227,11 @@ class SearchResults:
def __repr__(self, limit_results = None):
"""
repr method for SearchResults class
Input: (int) limit_results max number of results to show
(will show all results if = None)
"""
info = 'Results for ' + self.__query + ':\n'
if limit_results != None:
results = self.articles[:limit_results]
......@@ -208,6 +247,11 @@ class SearchResults:
class OuterArticle:
def __init__(self, result):
"""
Class constructor for OuterArticle class
Input: result = a soup object representing a search directory
"""
self.title = result.find('div', class_ = 'result_title').text
self.date_updated = ' '.join(result.find\
('li', class_ = 'sr_updated').text.split()[1:])
......@@ -225,6 +269,11 @@ class Article:
def __init__(self, url):
"""
Class constructor for the Article class
Input: url = url for a specific article
"""
self.__soup = self.get_page_soup(url)
self.title = self.__soup.find('h1').text
self.sources = self.get_sources(self.__soup)
......@@ -248,11 +297,23 @@ class Article:
self.related_categories = []
def get_page_soup(self, url):
"""
Take a url and return a soup object
Input: url of a specific article
Returns: BeautifulSoup object
"""
response = requests.get(url)
return BeautifulSoup(response.content, 'html.parser')
def get_sources(self, soup):
"""
Take a soup object of an article and return the cited sources
Input: wikihow article soup object
Returns: list of cited sources
"""
divs = soup.find('div', id='references')
try:
sources = [s['href'] for s in\
......@@ -264,6 +325,12 @@ class Article:
def get_author(self, soup):
"""
Take a soup object of an article and return the main author name
Input: wikihow article soup object
Returns: Name of main author
"""
div = soup.find('a', class_ = "sp_namelink")
try:
name = div.string
......@@ -274,6 +341,14 @@ class Article:
def get_sp_text_data(self, soup):
"""
Take a soup object of an article and return a list containing
number of co-authors, date updated, and views
Input: wikihow article soup object
Returns: list containing number of co-authors, date updated,
and number of views
"""
lst = []
span = soup.find_all('span', class_='sp_text_data')
for x in span:
......@@ -368,6 +443,9 @@ class Article:
def all_peripheral_links(self, soup, url):
"""
"""
related_links = self.get_related(self.__soup, url)
expanding, full_expanse = self.expanding_breadcrumbs(related_links, soup)
link_family = {**related_links, **expanding, **full_expanse}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment