Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Ethan Mertz
/
CS-123-Final
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
f411ae04
authored
Jun 01, 2018
by
Ethan Mertz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed top text and util to work
parent
12efb152
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
18 deletions
Python/toptext.py
util.py
Python/toptext.py
View file @
f411ae04
...
@@ -2,32 +2,33 @@ from mrjob.job import MRJob
...
@@ -2,32 +2,33 @@ from mrjob.job import MRJob
import
util
import
util
import
heapq
import
heapq
import
jellyfish
import
jellyfish
import
json
CAPACITY
=
100
CAPACITY
=
100
class
MRWordFreqCount
(
MRJob
):
class
MRWordFreqCount
(
MRJob
):
def
mapper_init
(
self
):
with
open
(
"var_map.json"
)
as
var
:
self
.
varible_score
=
json
.
reads
(
var
)
with
open
(
"func_map.json"
)
as
func
:
self
.
function_score
=
json
.
reads
(
func
)
def
mapper
(
self
,
_
,
line
):
def
mapper
(
self
,
_
,
line
):
file1
,
file2
,
total
=
line
.
split
(
","
)
file1
,
file2
,
total
=
line
.
split
(
","
)
file1name
=
"pyfile"
+
file1
+
".txt"
file2name
=
"pyfile"
+
file2
+
".txt"
file2name
=
"pyfile"
+
file2
+
".txt"
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
for
file_number
in
{
file1
,
file2
}:
compare1
=
"pyfile"
+
str
(
i
)
+
".txt"
filename
=
"pyfile"
+
file_number
+
".txt"
params
=
util
.
funcsim
(
file1name
,
compare1
)
file_text
=
filename
.
read
()
text1
=
params
[
4
]
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
text2
=
params
[
5
]
comparison_file
=
"pyfile"
+
i
+
".txt"
simscore
=
jellyfish
.
jaro_distance
(
text1
,
text2
)
comparison_text
=
comparison_file
.
read
()
yield
file1name
,
simscore
ts
=
jellyfish
.
jaro_winkler
(
filename_text
,
comparison_text
)
yield
compare1
,
simscore
for
text
in
[(
filename
,
file_text
),
(
comparison_file
,
comparison_text
)]:
if
file1
!=
file2
:
vs
=
util
.
get_variable_score
(
self
.
varible_score
,
text
[
1
])
for
i
in
range
(
int
(
file2
)
+
1
,
int
(
total
)):
fs
=
util
.
get_function_score
(
self
.
function_score
,
text
[
1
])
compare1
=
"pyfile"
+
str
(
i
)
+
".txt"
yield
text
[
0
],
ts
+
vs
+
fs
params
=
util
.
funcsim
(
file1name
,
compare1
)
text1
=
params
[
4
]
text2
=
params
[
5
]
simscore
=
jellyfish
.
jaro_distance
(
text1
,
text2
)
yield
file1name
,
simscore
yield
compare1
,
simscore
def
combiner
(
self
,
word
,
counts
):
def
combiner
(
self
,
word
,
counts
):
cs
=
list
(
counts
)
cs
=
list
(
counts
)
...
...
util.py
View file @
f411ae04
import
re
import
re
import
jellyfish
import
jellyfish
REG_V
=
"[
\
s]*([a-zA-Z_][
\
w, ]*)[
\
s]*=[^=]"
REG_F
=
"def [a-zA-Z_][
\
w]*
\
("
DEFAULT_RETURN
=
1
def
funcsim
(
file1name
,
file2name
):
def
funcsim
(
file1name
,
file2name
):
count
=
0
count
=
0
with
open
(
file1name
)
as
f1
:
with
open
(
file1name
)
as
f1
:
...
@@ -12,6 +16,24 @@ def funcsim(file1name,file2name):
...
@@ -12,6 +16,24 @@ def funcsim(file1name,file2name):
return
(
name1
,
name2
,
params1
,
params2
,
text1
,
text2
)
return
(
name1
,
name2
,
params1
,
params2
,
text1
,
text2
)
def
get_variable_score
(
d
,
text
):
variables
=
re
.
findall
(
REG_V
,
text
)
num
=
len
(
variables
)
total
=
0
for
variable
in
variables
:
total
+=
d
.
get
(
variable
,
DEFAULT_RETURN
)
return
total
/
num
def
get_function_score
(
d
,
text
):
functions
=
re
.
findall
(
REG_F
,
text
)
num
=
len
(
functions
)
total
=
0
for
functions
in
functions
:
total
+=
d
.
get
(
function
,
DEFAULT_RETURN
)
return
total
/
num
'''
'''
Functions for C
Functions for C
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment