Commit f0978ef8 by Michelle Awh

hi crhis

parent 68a3a764
......@@ -258,7 +258,8 @@ class Article:
sources = [s['href'] for s in\
divs.find_all('a', class_ = 'external free')]
except AttributeError:
sources = None
sources = []
print('sources',sources)
return sources
......@@ -512,71 +513,6 @@ class Article:
common_words = [w[0][0] for w in common_words]
return common_words
'''
def find_salient_ngrams(self, wh_page, n, case_sensitive, threshold):
ngram = self.n_gram(wh_page,case_sensitive,n)
salient_ngrams = self.find_salient(ngram,threshold)
return salient_ngrams
def calc_tf(self, a_token, tokens):
"""
calculates term frequency
"""
token_list = self.count_tokens(tokens)
max_term = max(token_list.values())
f_term = token_list[a_token]
tf = 0.5+0.5*(f_term/max_term)
return tf
def count_tokens(self, tokens):
token_dict = {}
for token in tokens:
if token not in token_dict:
token_dict[token] = 1
else:
token_dict[token] += 1
return token_dict
def calc_idf(self, docs,a_token):
"""
calculates inverse document frequency
"""
docs_with_t = 0
N = len(docs)
for lst in docs:
if a_token in lst:
docs_with_t +=1
idf = math.log(N/docs_with_t)
return idf
def find_salient(self, docs, threshold):
Compute the salient words for a given WH page. A word is salient if
its tf-idf score is strictly above a given threshold.
Inputs:
docs: list of list of tokens
threshold: float
Returns: list of sets of salient words
salient_lst = []
new_set = set()
for a_token in tok_lst:
tf_idf = self.calc_tf(a_token,tok_lst) * self.calc_idf(docs, a_token)
print(tf_idf)
if tf_idf > threshold:
new_set.add(a_token)
salient_lst.append(new_set)
return salient_lst
'''
def __repr__(self):
print('repr')
......
......@@ -36,7 +36,6 @@ layout = [[sg.Text('Wikihow Tool')],
'''
def interact(filters = {}):
<<<<<<< HEAD
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
......@@ -47,8 +46,7 @@ def interact(filters = {}):
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Language of the Article')],
[sg.StatusBar( text=f'| Filters On: {None}', size=(30,1), pad=(0,0), text_color='black', background_color='white', relief=sg.RELIEF_FLAT, justification='left', visible=True, key='status_bar' )]]
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Available in')]]
#layout_x = layout
window = sg.Window('WikiHow Search and Filters', layout)
......@@ -88,61 +86,8 @@ def display_results(filters, results_pg_active):
if ev2 == sg.WIN_CLOSED or ev2 == 'Exit':
results_pg_active = False
win2.close()
#code to add window here
=======
layout = [[sg.Text('Wikihow Tool')],
[sg.Text('Search Here'), sg.Input(key = 'Query'), sg.Button('Search')],
[sg.Text('Filters', font = ('Helvetica', 15, 'underline'))],
[sg.Text('ChildSafety'), sg.CBox('On', key = 'child_safe'), sg.Spin([i for i in range(53)], initial_value=0, key = "date_updated"), sg.Text('Max time (months) since last update')],
[sg.Text('Expert Co-authored'), sg.CBox('', key = 'e_verif'), sg.Text('Quality Tested'), sg.CBox('', key = 'q_verif')],
[sg.Text('Minimum % viewers who found it helpful'), sg.Slider(range=(0,100), default_value=50, size=(20,15), orientation='horizontal', font=('Helvetica', 12), key = 'pct_helpful')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_voters"), sg.Text('Number of people who voted on helpfulness')],
[sg.Text('Categories'), sg.CBox('House & Garden', key = 'House'), sg.CBox('Computers & Internet', key = 'CP & I'), sg.CBox('Art and Literature', key = 'Arts'), sg.CBox('Social and Organizational Skills', key = 'Social keys')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_sources"), sg.Text('Number of sources cited')],
[sg.Spin([i for i in range(100)], initial_value=0, key = "num_views"), sg.Text('Views')],
[sg.Combo(['English', 'Español', 'Português', 'Italiano', 'Français', 'русский язык', 'Deutsch', '简体中文', 'Nederlands', 'Čeština', 'Bahasa Indonesia', '日本語', 'हिंदी', 'ภาษาไทย',' العَرَبِيةُ', 'Tiếng Việt', '한국말', 'Türkçe',' فارسی']), sg.Text('Language of the Article')],
[sg.StatusBar( text=f'| Filters On: {None}', size=(30,1), pad=(0,0), text_color='black', background_color='white', relief=sg.RELIEF_FLAT, justification='left', visible=True, key='status_bar' )]]
#layout_x = layout
print('im interacting')
window = sg.Window('WikiHow Search and Filters', layout)
results_pg_active = False
while True:
print(results_pg_active)
event, values = window.read()
if event == sg.WIN_CLOSED:
break
elif not results_pg_active and event == 'Search':
results_pg_active = True
filters = values
wh_query = values['Query']
print(event, values)
display_results(filters, results_pg_active)
results_pg_active = False
window.close()
def display_results(filters, results_pg_active):
print(results_pg_active)
query = filters['Query']
results = SearchResults.SearchResults(query, filters)
print(results)
layout2 = [[sg.Text('Results')],
[sg.Text(str(filters))]]
win2 = sg.Window('Window 2', layout2)
if results_pg_active:
ev2, vals2 = win2.read(timeout=100)
if ev2 == sg.WIN_CLOSED or ev2 == 'Exit':
results_pg_active = False
win2.close()
#code to add window here
>>>>>>> d2692ff014493cfae8c975b6a9b16d55f549c9fb
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment