sultry gift for u chris

68a3a764 · Michelle Awh · 1125f4b9 · 68a3a764
Commit 68a3a764 authored Mar 12, 2021 by Michelle Awh
Showing with 0 additions and 94 deletions
SearchResults.py
--- a/SearchResults.py
+++ b/SearchResults.py
@@ -14,7 +14,6 @@ import re
 import urllib.parse
 import sys
 import unicodedata
-<<<<<<< HEAD
 from collections import Counter
 import nltk
 nltk.download('stopwords')
@@ -24,28 +23,6 @@ inappropriate_words_lst = inappropriate_words()



-=======
-
-
-
-def keep_chr(ch):
-    '''
-    Find all characters that are classifed as punctuation in Unicode
-    (except #, @, &) and combine them into a single string.
-    '''
-    return unicodedata.category(ch).startswith('P') and \
-        (ch not in ("#", "@", "&"))
-
-
-PUNCTUATION = " ".join([chr(i) for i in range(sys.maxunicode)
-                        if keep_chr(chr(i))])
-
-STOP_WORDS = ["a", "an", "the", "this", "that", "of", "for", "or",
-              "and", "on", "to", "be", "if", "we", "you", "in", "is",
-              "at", "it", "rt", "mt", "with", "t", "don", "wh"]
-
-STOP_PREFIXES = ("@", "#", "http", "&amp")
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb



@@ -87,7 +64,6 @@ class SearchResults:
        Returns:
            a SearchResults object
        '''
-<<<<<<< HEAD
        self.__filters = filters
        self.__required_words = []
        self.__required_words += self.get_required_words(query)
@@ -98,19 +74,6 @@ class SearchResults:
        else:
            self.__query = query
        self.__soup = self.get_search_soup(self.__query)
-=======
-
-        self.__query = query
-        self.__filters = filters
-        self.__required_words = self.get_required_words(query)
-        if self.__required_words != None:
-            self.__filters['required_words'] = self.__required_words
-        self.__forbidden_words = self.get_forbidden_words(query)
-        if self.__forbidden_words != None:
-            self.__filters['forbidden_words'] = self.__forbidden_words
-        
-        self.__soup = self.get_search_soup(query)
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
        self.__links = [a['href'] for a in self.__soup.find_all(\
                        'a', class_='result_link')]
        self.__results = self.__soup.find_all('a', class_ = 'result_link')
@@ -121,17 +84,12 @@ class SearchResults:
        self.__outer_articles = [r for r in self.__unfiltered_outer_articles\
             if self.passes_outer_filters(r)]
        self.__unfiltered_articles = [Article(r.url) for r in self.__outer_articles]
-<<<<<<< HEAD
        self.articles = []
        for a in self.__unfiltered_articles:
            if self.passes_all_filters(a):
                self.articles.append(a)
        for a in self.articles:
            a.related_enough()
-=======
-        self.articles = [a for a in self.__unfiltered_articles if\
-            self.passes_all_filters(a)]
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb


    def get_search_soup(self, query):
@@ -142,11 +100,6 @@ class SearchResults:

    
    def passes_outer_filters(self, outer_article):
-<<<<<<< HEAD
-        print(self.__filters)
-        print('checking')
-=======
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
        if 'date_updated' in self.__filters:
            num, interval = outer_article.date_updated.split()[:2]
            num = int(num)
@@ -207,16 +160,11 @@ class SearchResults:
            if self.__filters['child_safe']:
                self.__forbidden_words += inappropriate_words_lst
        if 'pct_helpful' in self.__filters:
-<<<<<<< HEAD
            try:
                if int(article.pct_helpful) < self.__filters['pct_helpful']:
                    return False
            except TypeError:
                pass
-=======
-            if article.pct_helpful < self.__filters['pct_helpful']:
-                return False
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
        if 'num_voters' in self.__filters:
            if int(article.num_voters) < self.__filters['num_voters']:
                return False
@@ -489,7 +437,6 @@ class Article:
                    links.append(filtered_link)

        return links
-<<<<<<< HEAD



@@ -536,12 +483,9 @@ class Article:
        self.related_articles += actual_related_articles
        self.related_categories += actual_related_categories

-=======
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
    

    def pre_processing(self, wh_page, caps):
-<<<<<<< HEAD
        tokenizer = nltk.RegexpTokenizer(r'\w+')
        list_of_words = tokenizer.tokenize(wh_page)
        list_of_words = [word for word in list_of_words if word not in stopwords.words('english')]
@@ -549,33 +493,6 @@ class Article:


    def n_gram(self, wh_page, caps, n):
-=======
-        
-        list_of_words = wh_page.split()
-        list_of_words = [word.strip(PUNCTUATION) for word in list_of_words \
-                                                if word.strip(PUNCTUATION) != '']
-        if not caps:
-            list_of_words = [word.lower() for word in list_of_words]        
-        list_of_words = [word for word in list_of_words if word not in STOP_WORDS]
-        list_of_words = [word for word in list_of_words \
-                                            if not word.startswith(STOP_PREFIXES)]
-        return list_of_words
-
-
-    def find_common_words(self, string_lst):
-
-        words_lst = Counter()
-        strng_lst = self.n_gram(string_lst, False, 1)
-        print(strng_lst)
-        words_lst.update(w for w in strng_lst)
-        print(words_lst)
-        common_words = [w for w in words_lst.most_common(15)]
-        common_words = [w[0][0] for w in common_words]
-        return common_words
-    
-    def n_gram(self, wh_page, caps, n):
-        
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
        n_gram_lst = []
        list_of_words = self.pre_processing(wh_page,caps)
        start_value = 0
@@ -585,7 +502,6 @@ class Article:
            start_value +=1
            stop_value +=1
        return n_gram_lst
-<<<<<<< HEAD
    
    
    def find_common_words(self, string):
@@ -595,8 +511,6 @@ class Article:
        common_words = [w for w in words_lst.most_common(15)]
        common_words = [w[0][0] for w in common_words]
        return common_words
-=======
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb

    '''
    def find_salient_ngrams(self, wh_page, n, case_sensitive, threshold):
@@ -692,11 +606,3 @@ class Article:
        else:
            info += '\nNo Sources Cited'
        return info
-
-<<<<<<< HEAD
-def test():
-    print(inappropriate_words_lst)
-=======
-    
-    
->>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb