Commit c72c2a34 by Michelle Awh

got hting working

parent f77600a7
......@@ -64,6 +64,7 @@ class SearchResults:
Returns:
a SearchResults object
'''
print(filters)
self.__filters = filters
self.__required_words = []
self.__required_words += self.get_required_words(query)
......@@ -115,21 +116,11 @@ class SearchResults:
months = num / 720
if months > self.__filters['date_updated']:
return False
if self.__filters['q_verif']:
try:
if 'Quality Tested' not in outer_article.verif:
return full_expanse
except AttributeError:
pass
if self.__filters['e_verif']:
try:
if 'Expert Co-Authored' not in outer_article.verif:
return False
except AttributeError:
pass
if 'num_views' in self.__filters:
if self.__filters['num_views'] > outer_article.views:
return False
if 'Main-Page' in outer_article.url:
return False
return True
......@@ -151,11 +142,12 @@ class SearchResults:
else:
forbidden_words += match.group().split()
forbidden_words[0] = forbidden_words[0][1:]
print('forbidden words',forbidden_words)
return forbidden_words
def passes_all_filters(self, article):
if 'Main-Page' in article.url:
return False
if 'child_safe' in self.__filters:
if self.__filters['child_safe']:
self.__forbidden_words += inappropriate_words_lst
......@@ -169,7 +161,7 @@ class SearchResults:
if int(article.num_voters) < self.__filters['num_voters']:
return False
if 'num_sources' in self.__filters:
if len(article.sources) < self.__filters['num_sources']:
if len(article.sources) < int(self.__filters['num_sources']):
return False
if 'num_views' in self.__filters:
if article.num_views < self.__filters['num_views']:
......@@ -179,9 +171,6 @@ class SearchResults:
W = w[0].upper() + w[1:]
wl = w[0].lower() + w[1:]
if W not in article.words and wl not in article.words:
print(article.title)
print(W,wl)
print('not there')
return False
if len(self.__forbidden_words) != 0:
for w in self.__forbidden_words:
......@@ -189,6 +178,21 @@ class SearchResults:
wl = w[0].lower() + w[1:]
if W in article.words or wl in article.words:
return False
if self.__filters['q_verif']:
try:
if 'Quality Tested' not in article.verif:
return False
except AttributeError:
pass
if self.__filters['e_verif']:
try:
if 'Expert Co-authored' not in article.verif:
return False
except AttributeError:
pass
if self.__filters['language'] != '':
if self.__filters['language'] not in article.languages:
return False
return True
......@@ -233,20 +237,17 @@ class Article:
self.get_sp_text_data(self.__soup)
self.url = url
self.num_voters, self.pct_helpful = self.get_helpful(self.__soup)
print(self.title,'votes',self.num_voters,'percent',self.pct_helpful)
self.rating = self.find_rating(self.__soup)
self.languages = self.find_languages(self.__soup)
self.verif = self.expert_data(self.__soup)
self.view = self.__repr__()
self.words = self.scrape_words(self.url, self.__soup)
'''
self.salient = self.find_salient_ngrams(self.words,\
1, False, 0.5)
'''
self.tags = self.find_common_words(self.words)
self.link_family = self.all_peripheral_links(self.__soup, self.url)
self.related_articles = []
self.related_categories = []
def get_page_soup(self, url):
response = requests.get(url)
return BeautifulSoup(response.content, 'html.parser')
......@@ -259,10 +260,27 @@ class Article:
divs.find_all('a', class_ = 'external free')]
except AttributeError:
sources = []
print('sources',sources)
return sources
def expert_data(self, soup):
"""
Take a soup object and return a string if it has an
expert co-author or if it has been quality tested
Input: soup object
Returns: String
"""
expert = soup.find("div", class_ = "sp_coauthor_label")
verif = ''
if expert != None:
if "Co-authored" in expert.text:
verif += "Expert Co-authored"
elif "Tested" in expert.text:
verif += "Quality Tested"
return verif
def get_author(self, soup):
div = soup.find('a', class_ = "sp_namelink")
try:
......@@ -277,9 +295,14 @@ class Article:
lst = []
span = soup.find_all('span', class_='sp_text_data')
for x in span:
print(x)
if x.text not in lst:
lst.append(x.text)
lst[-1] = int(''.join(lst[-1].split(',')))
print(lst)
try:
lst[-1] = int(''.join(lst[-1].split(',')))
except IndexError:
return ['','','']
return lst
......@@ -350,6 +373,7 @@ class Article:
Input: soup object
Returns: String containing the text on the article
"""
print(url)
title = soup.find("h1", id = "section_0").text
description = soup.find("div", class_ = "mf-section-0").text
steps = soup.find_all("div", class_ = "step")
......@@ -515,10 +539,7 @@ class Article:
def __repr__(self):
print('repr')
info = self.title
if self.url != None:
info += '\n' + self.url
info = ''
if self.author != None:
info += '\nMain Author: ' + self.author
if self.author_bio != None:
......@@ -526,7 +547,12 @@ class Article:
else:
info += '\nNo listed main author'
info += '\nCoauthored by ' + self.num_coauthors + ' additional authors'
info += '\n Viewed by ' +str(self.num_views)
if self.verif != '':
info += '\nVerification: '
verifs = [v for v in ('Expert Co-authored', 'Quality Tested')\
if v in self.verif]
info += ', \n'.join(verifs)
info += '\nViewed by ' + str(self.num_views)
if self.num_voters != 0:
info += '\n' + str(self.pct_helpful) + '% of ' + str(self.num_voters)
info += ' voters found this article helpful'
......@@ -535,10 +561,8 @@ class Article:
info += '\nLast Updated on ' + self.date_updated
info += '\nAlso available in '
info += ', '.join(self.languages)
if self.sources != None:
info += '\nSources cited: \n'
info += '\nSources Cited: ' + ', \n'.join(self.sources)
else:
if self.sources == None:
info += '\nNo Sources Cited'
return info
\ No newline at end of file
......@@ -2,41 +2,16 @@
import sys
import PySimpleGUI as sg
import SearchResults
def callback_function1():
sg.popup('In Callback Function 1')
print('In the callback function 1')
def callback_function2():
import webbrowser
sg.popup('In Callback Function 2')
print('In the callback function 2')
'''
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
[sg.Text('Filters', font = ('Helvetica', 15, 'underline'))],
[sg.Text('ChildSafety'), sg.CBox('On', key = 'child_safe'), sg.Spin([i for i in range(53)], initial_value=0, key = "date_updated"), sg.Text('Max time (months) since last update')],
[sg.Text('Expert Co-authored'), sg.CBox('', key = 'e_verif'), sg.Text('Quality Tested'), sg.CBox('', key = 'q_verif')],
[sg.Text('Minimum % viewers who found it helpful'), sg.Slider(range=(0,100), default_value=50, size=(20,15), orientation='horizontal', font=('Helvetica', 12), key = 'pct_helpful')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Text('Categories'), sg.CBox('House & Garden', key = 'House'), sg.CBox('Computers & Internet', key = 'CP & I'), sg.CBox('Art and Literature', key = 'Arts'), sg.CBox('Social and Organizational Skills', key = 'Social keys')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Language of the Article')],
[sg.StatusBar( text=f'| Filters On: {None}', size=(30,1), pad=(0,0), text_color='black', background_color='white', relief=sg.RELIEF_FLAT, justification='left', visible=True, key='status_bar' )]]
'''
def interact(filters = {}):
'''
'''
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
[sg.Text('Filters', font = ('Helvetica', 15, 'underline'))],
......@@ -46,14 +21,13 @@ def interact(filters = {}):
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Available in')]]
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی'], key = 'language'), sg.Text('Available in')]]
#layout_x = layout
window = sg.Window('WikiHow Search and Filters', layout)
results_pg_active = False
while True:
print(results_pg_active)
while True:
event, values = window.read()
if event == sg.WIN_CLOSED:
break
......@@ -61,31 +35,37 @@ def interact(filters = {}):
results_pg_active = True
filters = values
wh_query = values['Query']
print(event, values)
display_results(filters, results_pg_active)
results_pg_active = False
window.close()
def display_results(filters, results_pg_active):
print(results_pg_active)
query = filters['Query']
input_filters = dict(filters)
if filters['date_updated'] == 0:
del input_filters['date_updated']
results = SearchResults.SearchResults(query, input_filters)
print(results)
layout2 = [[sg.Text('Results')],
[sg.Text(results)],
[sg.Text(str(filters))]]
layout2 = [[sg.Column(layout2, scrollable=True)]]
win2 = sg.Window('Window 2', layout2)
if results_pg_active:
layout2 = [[sg.Text('Results:\n\n')]]
for a in results.articles:
layout2.append([sg.Text(a.title)])
if a.url != None:
layout2.append([sg.Text(a.url, enable_events=True)])
layout2.append([sg.Text(a.view)])
for s in a.sources:
layout2.append([sg.Text(s, enable_events=True)])
layout2.append([sg.Text('\n\n')])
layout2.append([sg.Text(str(filters))])
layout3 = [[sg.Column(layout2, scrollable=True)]]
win2 = sg.Window('Window 2', layout3)
while results_pg_active:
ev2, vals2 = win2.read(timeout=100)
if ev2 == sg.WIN_CLOSED or ev2 == 'Exit':
results_pg_active = False
win2.close()
elif 'http' in ev2:
webbrowser.open(ev2, new = 2)
win2.close()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment