a

c4472f13 · Michelle Awh · c72c2a34 · 7e06e29c · c4472f13
Commit c4472f13 authored Mar 12, 2021 by Michelle Awh
Showing with 81 additions and 0 deletions
SearchResults.py
--- a/SearchResults.py
+++ b/SearchResults.py
@@ -94,6 +94,12 @@ class SearchResults:


    def get_search_soup(self, query):
+        """
+        Take a string of search terms and return a soup object
+
+        Input: query
+        Returns: soup object
+        """
        search = '+'.join(query.split())
        response = requests.get('https://www.wikihow.com/wikiHowTo?search=' + search)
        soup = BeautifulSoup(response.content, 'html.parser')
@@ -101,6 +107,13 @@ class SearchResults:

    
    def passes_outer_filters(self, outer_article):
+        """
+        Using an outer_article object, check to make sure the
+        article fits the chosen filters
+
+        Input: outer_article object
+        Returns: Boolean representing if the article meets the given conditions
+        """
        if 'date_updated' in self.__filters:
            num, interval = outer_article.date_updated.split()[:2]
            num = int(num)
@@ -125,6 +138,13 @@ class SearchResults:
            

    def get_required_words(self, query):
+        """
+        Given a string of words required for the results, split
+        and return as a list
+
+        Input: (str) query representing words to look for
+        Returns: List of search terms
+        """
        required_words = []
        match = re.findall('".*"', query)
        if match == []:
@@ -135,6 +155,13 @@ class SearchResults:


    def get_forbidden_words(self, query):
+        """
+        Given a string of words used to filter out results, split
+        and return as a list
+
+        Input: (str) query representing words to avoid in results
+        Returns: List of forbidden words
+        """
        forbidden_words = []
        match = re.search('-.*$', query)
        if match == None:
@@ -146,8 +173,18 @@ class SearchResults:


    def passes_all_filters(self, article):
+<<<<<<< HEAD
        if 'Main-Page' in article.url:
            return False
+=======
+        """
+        Given an article object, check to make sure that the 
+        article matches the conditions set by the filters
+
+        Input: article object
+        Returns: boolean representing if the conditions have been met
+        """
+>>>>>>> 7e06e29c9767fc90bea8e3028de2e61851c4c33c
        if 'child_safe' in self.__filters:
            if self.__filters['child_safe']:
                self.__forbidden_words += inappropriate_words_lst
@@ -197,6 +234,11 @@ class SearchResults:


    def __repr__(self, limit_results = None):
+        """
+        repr method for SearchResults class
+        Input: (int) limit_results max number of results to show
+            (will show all results if = None)
+        """
        info = 'Results for ' + self.__query + ':\n'
        if limit_results != None:
            results = self.articles[:limit_results]
@@ -212,6 +254,11 @@ class SearchResults:
 class OuterArticle:
    
    def __init__(self, result):
+        """
+        Class constructor for OuterArticle class
+
+        Input: result = a soup object representing a search directory
+        """
        self.title = result.find('div', class_ = 'result_title').text
        self.date_updated = ' '.join(result.find\
               ('li', class_ = 'sr_updated').text.split()[1:])
@@ -229,6 +276,11 @@ class Article:
     

    def __init__(self, url):
+        """
+        Class constructor for the Article class
+
+        Input: url = url for a specific article
+        """
        self.__soup = self.get_page_soup(url)
        self.title = self.__soup.find('h1').text
        self.sources = self.get_sources(self.__soup)
@@ -249,11 +301,23 @@ class Article:


    def get_page_soup(self, url):
+        """
+        Take a url and return a soup object
+
+        Input: url of a specific article
+        Returns: BeautifulSoup object
+        """
        response = requests.get(url)
        return BeautifulSoup(response.content, 'html.parser')


    def get_sources(self, soup):
+        """
+        Take a soup object of an article and return the cited sources
+
+        Input: wikihow article soup object
+        Returns: list of cited sources
+        """
        divs = soup.find('div', id='references')
        try:
            sources = [s['href'] for s in\
@@ -282,6 +346,12 @@ class Article:


    def get_author(self, soup):
+        """
+        Take a soup object of an article and return the main author name
+        
+        Input: wikihow article soup object
+        Returns: Name of main author
+        """
        div = soup.find('a', class_ = "sp_namelink")
        try:
            name = div.string
@@ -292,6 +362,14 @@ class Article:


    def get_sp_text_data(self, soup):
+        """
+        Take a soup object of an article and return a list containing 
+        number of co-authors, date updated, and views
+
+        Input: wikihow article soup object
+        Returns: list containing number of co-authors, date updated, 
+            and number of views
+        """
        lst = []
        span = soup.find_all('span', class_='sp_text_data')
        for x in span:
@@ -392,6 +470,9 @@ class Article:


    def all_peripheral_links(self, soup, url):
+        """
+        
+        """
        related_links = self.get_related(self.__soup, url)
        expanding, full_expanse = self.expanding_breadcrumbs(related_links, soup)
        link_family = {**related_links, **expanding, **full_expanse}