Commit 78506518 by Alex Chang
parents c8408f34 8b5b9395
......@@ -77,25 +77,31 @@ class SearchResults:
'''
self.__query = query
print('query',query)
self.__filters = filters
print('filters',filters)
self.__required_words = self.get_required_words(query)
if self.__required_words != None:
self.__filters['required_words'] = self.__required_words
self.__forbidden_words = self.get_forbidden_words(query)
if self.__forbidden_words != None:
self.__filters['forbidden_words'] = self.__forbidden_words
self.__soup = self.get_search_soup(query)
self.__links = [a['href'] for a in self.__soup.find_all(\
'a', class_='result_link')]
self.__results = self.__soup.find_all('a', class_ = 'result_link')
print('num reuslts',len(self.__results))
print(self.__results[0])
self.__unfiltered_outer_articles = [OuterArticle(r) for r in self.__results if \
'Category' not in r.find('div', class_ = 'result_title').text]
print('unfiltered outer aticles',len(self.__unfiltered_outer_articles))
self.__categories = [r for r in self.__results if\
'Category' in r.find('div', class_ = 'result_title')]
self.__outer_articles = [r for r in self.__unfiltered_outer_articles\
if self.passes_outer_filters(r)]
print('num outer articles',len(self.__outer_articles))
self.__unfiltered_articles = [Article(r.url) for r in self.__outer_articles]
print('making articles')
self.articles = [a for a in self.__unfiltered_articles if\
self.passes_all_filters(a)]
......@@ -108,6 +114,7 @@ class SearchResults:
def passes_outer_filters(self, outer_article):
print('checking')
if 'date_updated' in self.__filters:
num, interval = outer_article.date_updated.split()[:2]
num = int(num)
......@@ -160,8 +167,11 @@ class SearchResults:
if self.__filters['child_safe']:
self.__forbidden_words += inappropiate_words
if 'pct_helpful' in self.__filters:
if article.pct_helpful < self.__filters['pct_helpful']:
return False
try:
if article.pct_helpful < self.__filters['pct_helpful']:
return False
except TypeError:
pass
if 'num_voters' in self.__filters:
if article.num_voters < self.__filters['num_voters']:
return False
......@@ -176,6 +186,7 @@ class SearchResults:
def __repr__(self, limit_results = None):
info = 'Results for ' + self.__query + ':\n'
print(len(self.articles))
if limit_results != None:
results = self.articles[:limit_results]
else:
......@@ -219,9 +230,10 @@ class Article:
self.languages = self.find_languages(self.__soup)
self.view = self.__repr__()
self.words = self.scrape_words(self.url, self.__soup)
'''
self.salient = self.find_salient_ngrams(self.words,\
1, False, 0.5)
'''
def get_page_soup(self, url):
response = requests.get(url)
......@@ -403,7 +415,7 @@ class Article:
links.append(filtered_link)
return links
'''
def find_salient_ngrams(self, wh_page, n, case_sensitive, threshold):
......@@ -476,7 +488,7 @@ class Article:
return idf
def find_salient(self, docs, threshold):
'''
"""
Compute the salient words for each document. A word is salient if
its tf-idf score is strictly above a given threshold.
......@@ -485,7 +497,7 @@ class Article:
threshold: float
Returns: list of sets of salient words
'''
"""
salient_lst = []
for tok_lst in docs:
......@@ -496,8 +508,8 @@ class Article:
new_set.add(a_token)
salient_lst.append(new_set)
return salient_lst
'''
def __repr__(self):
print('repr')
info = self.title
......
......@@ -2,73 +2,77 @@
import sys
import PySimpleGUI as sg
import SearchResults
def callback_function1():
sg.popup('In Callback Function 1')
print('In the callback function 1')
sg.popup('In Callback Function 1')
print('In the callback function 1')
def callback_function2():
sg.popup('In Callback Function 2')
print('In the callback function 2')
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
[sg.Text('Filters', font = ('Helvetica', 15, 'underline'))],
[sg.Text('ChildSafety'), sg.CBox('On', key = 'child_safe'), sg.Spin([i for i in range(53)], initial_value=0, key = "date_updated"), sg.Text('Max time (months) since last update')],
[sg.Text('Expert Co-authored'), sg.CBox('', key = 'e_verif'), sg.Text('Quality Tested'), sg.CBox('', key = 'q_verif')],
[sg.Text('Minimum % viewers who found it helpful'), sg.Slider(range=(0,100), default_value=50, size=(20,15), orientation='horizontal', font=('Helvetica', 12), key = 'pct_helpful')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Text('Categories'), sg.CBox('House & Garden', key = 'House'), sg.CBox('Computers & Internet', key = 'CP & I'), sg.CBox('Art and Literature', key = 'Arts'), sg.CBox('Social and Organizational Skills', key = 'Social keys')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Language of the Article')],
[sg.StatusBar( text=f'| Filters On: {None}', size=(30,1), pad=(0,0), text_color='black', background_color='white', relief=sg.RELIEF_FLAT, justification='left', visible=True, key='status_bar' )]]
sg.popup('In Callback Function 2')
print('In the callback function 2')
def interact(filters = {}):
window = sg.Window('WikiHow Search and Filters', layout)
while True:
event, values = window.read()
if event == sg.WIN_CLOSED:
break
elif event == 'Search':
filters = values
wh_query = values['Query']
print(event, values)
display_results(filters)
window.close()
def display_results(filters):
query = filters['Query']
results = SearchResults.SearchResults(query, filters)
print(results)
#code to add window here
'''
sg.theme('DarkAmber') # Add a little color to your windows
# All the stuff inside your window. This is the PSG magic code compactor...
layout = [ [sg.Text('Some text on Row 1')],
[sg.Text('Enter something on Row 2'), sg.InputText()],
[sg.OK(), sg.Cancel()]]
# Create the Window
window = sg.Window('Window Title', layout)
# Event Loop to process "events"
while True:
event, values = window.read()
if event in (sg.WIN_CLOSED, 'Cancel'):
break
print('you entered', values[0])
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
[sg.Text('Filters', font = ('Helvetica', 15, 'underline'))],
[sg.Text('ChildSafety'), sg.CBox('On', key = 'child_safe'), sg.Spin([i for i in range(53)], initial_value=0, key = "date_updated"), sg.Text('Max time (months) since last update')],
[sg.Text('Expert Co-authored'), sg.CBox('', key = 'e_verif'), sg.Text('Quality Tested'), sg.CBox('', key = 'q_verif')],
[sg.Text('Minimum % viewers who found it helpful'), sg.Slider(range=(0,100), default_value=50, size=(20,15), orientation='horizontal', font=('Helvetica', 12), key = 'pct_helpful')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Text('Categories'), sg.CBox('House & Garden', key = 'House'), sg.CBox('Computers & Internet', key = 'CP & I'), sg.CBox('Art and Literature', key = 'Arts'), sg.CBox('Social and Organizational Skills', key = 'Social keys')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Language of the Article')],
[sg.StatusBar( text=f'| Filters On: {None}', size=(30,1), pad=(0,0), text_color='black', background_color='white', relief=sg.RELIEF_FLAT, justification='left', visible=True, key='status_bar' )]]
#layout_x = layout
print('im interacting')
window = sg.Window('WikiHow Search and Filters', layout)
results_pg_active = False
while True:
print(results_pg_active)
event, values = window.read()
if event == sg.WIN_CLOSED:
break
elif not results_pg_active and event == 'Search':
results_pg_active = True
filters = values
wh_query = values['Query']
print(event, values)
display_results(filters, results_pg_active)
results_pg_active = False
window.close()
def display_results(filters, results_pg_active):
print(results_pg_active)
query = filters['Query']
input_filters = dict(filters)
if filters['date_updated'] == 0:
del input_filters['date_updated']
results = SearchResults.SearchResults(query, input_filters)
print(results)
layout2 = [[sg.Text('Results')],
[sg.Text(str(filters))]]
win2 = sg.Window('Window 2', layout2)
if results_pg_active:
ev2, vals2 = win2.read(timeout=100)
if ev2 == sg.WIN_CLOSED or ev2 == 'Exit':
results_pg_active = False
win2.close()
#code to add window here
window.close()
'''
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment