Docstrings added

7e06e29c · Ryan Lee · f77600a7 · 7e06e29c
Commit 7e06e29c authored Mar 12, 2021 by Ryan Lee
Showing with 78 additions and 0 deletions
SearchResults.py
--- a/SearchResults.py
+++ b/SearchResults.py
@@ -93,6 +93,12 @@ class SearchResults:


    def get_search_soup(self, query):
+        """
+        Take a string of search terms and return a soup object
+
+        Input: query
+        Returns: soup object
+        """
        search = '+'.join(query.split())
        response = requests.get('https://www.wikihow.com/wikiHowTo?search=' + search)
        soup = BeautifulSoup(response.content, 'html.parser')
@@ -100,6 +106,13 @@ class SearchResults:

    
    def passes_outer_filters(self, outer_article):
+        """
+        Using an outer_article object, check to make sure the
+        article fits the chosen filters
+
+        Input: outer_article object
+        Returns: Boolean representing if the article meets the given conditions
+        """
        if 'date_updated' in self.__filters:
            num, interval = outer_article.date_updated.split()[:2]
            num = int(num)
@@ -134,6 +147,13 @@ class SearchResults:
            

    def get_required_words(self, query):
+        """
+        Given a string of words required for the results, split
+        and return as a list
+
+        Input: (str) query representing words to look for
+        Returns: List of search terms
+        """
        required_words = []
        match = re.findall('".*"', query)
        if match == []:
@@ -144,6 +164,13 @@ class SearchResults:


    def get_forbidden_words(self, query):
+        """
+        Given a string of words used to filter out results, split
+        and return as a list
+
+        Input: (str) query representing words to avoid in results
+        Returns: List of forbidden words
+        """
        forbidden_words = []
        match = re.search('-.*$', query)
        if match == None:
@@ -156,6 +183,13 @@ class SearchResults:


    def passes_all_filters(self, article):
+        """
+        Given an article object, check to make sure that the 
+        article matches the conditions set by the filters
+
+        Input: article object
+        Returns: boolean representing if the conditions have been met
+        """
        if 'child_safe' in self.__filters:
            if self.__filters['child_safe']:
                self.__forbidden_words += inappropriate_words_lst
@@ -193,6 +227,11 @@ class SearchResults:


    def __repr__(self, limit_results = None):
+        """
+        repr method for SearchResults class
+        Input: (int) limit_results max number of results to show
+            (will show all results if = None)
+        """
        info = 'Results for ' + self.__query + ':\n'
        if limit_results != None:
            results = self.articles[:limit_results]
@@ -208,6 +247,11 @@ class SearchResults:
 class OuterArticle:
    
    def __init__(self, result):
+        """
+        Class constructor for OuterArticle class
+
+        Input: result = a soup object representing a search directory
+        """
        self.title = result.find('div', class_ = 'result_title').text
        self.date_updated = ' '.join(result.find\
               ('li', class_ = 'sr_updated').text.split()[1:])
@@ -225,6 +269,11 @@ class Article:
     

    def __init__(self, url):
+        """
+        Class constructor for the Article class
+
+        Input: url = url for a specific article
+        """
        self.__soup = self.get_page_soup(url)
        self.title = self.__soup.find('h1').text
        self.sources = self.get_sources(self.__soup)
@@ -248,11 +297,23 @@ class Article:
        self.related_categories = []

    def get_page_soup(self, url):
+        """
+        Take a url and return a soup object
+
+        Input: url of a specific article
+        Returns: BeautifulSoup object
+        """
        response = requests.get(url)
        return BeautifulSoup(response.content, 'html.parser')


    def get_sources(self, soup):
+        """
+        Take a soup object of an article and return the cited sources
+
+        Input: wikihow article soup object
+        Returns: list of cited sources
+        """
        divs = soup.find('div', id='references')
        try:
            sources = [s['href'] for s in\
@@ -264,6 +325,12 @@ class Article:


    def get_author(self, soup):
+        """
+        Take a soup object of an article and return the main author name
+        
+        Input: wikihow article soup object
+        Returns: Name of main author
+        """
        div = soup.find('a', class_ = "sp_namelink")
        try:
            name = div.string
@@ -274,6 +341,14 @@ class Article:


    def get_sp_text_data(self, soup):
+        """
+        Take a soup object of an article and return a list containing 
+        number of co-authors, date updated, and views
+
+        Input: wikihow article soup object
+        Returns: list containing number of co-authors, date updated, 
+            and number of views
+        """
        lst = []
        span = soup.find_all('span', class_='sp_text_data')
        for x in span:
@@ -368,6 +443,9 @@ class Article:


    def all_peripheral_links(self, soup, url):
+        """
+        
+        """
        related_links = self.get_related(self.__soup, url)
        expanding, full_expanse = self.expanding_breadcrumbs(related_links, soup)
        link_family = {**related_links, **expanding, **full_expanse}