Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Ethan Mertz
/
CS-123-Final
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3df6560b
authored
Jun 03, 2018
by
Anselm Jia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
subsetting
parent
d4df4096
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
44 deletions
Python/pyfuncsplit.py
Python/topfunctions.py
util.py
Python/pyfuncsplit.py
View file @
3df6560b
...
...
@@ -8,7 +8,7 @@ def go():
current_func
=
""
count
=
0
for
line
in
f
:
if
count
<
100000
0
:
if
count
<
2
0
:
if
not
func
:
result
=
re
.
findall
(
"^([
\t
]*)def (
\
w+)
\
(([
\
w, ]*)
\
):"
,
line
)
if
result
:
...
...
Python/topfunctions.py
View file @
3df6560b
...
...
@@ -9,10 +9,10 @@ class TopFunctions(MRJob):
def
mapper
(
self
,
_
,
line
):
file1
,
file2
,
total
=
line
.
split
(
","
)
file1name
=
"pyfile"
+
file1
+
".txt"
file2name
=
"pyfile"
+
file2
+
".txt"
file1name
=
"pyf
unctions/pyf
ile"
+
file1
+
".txt"
file2name
=
"pyf
unctions/pyf
ile"
+
file2
+
".txt"
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
compare1
=
"pyfile"
+
str
(
i
)
+
".txt"
compare1
=
"pyf
unctions/pyf
ile"
+
str
(
i
)
+
".txt"
params
=
util
.
funcsim
(
file1name
,
compare1
)
name1
=
params
[
0
]
name2
=
params
[
1
]
...
...
@@ -21,7 +21,7 @@ class TopFunctions(MRJob):
yield
name2
,
(
1
,
total
,
simscore
)
if
file1
!=
file2
:
for
i
in
range
(
int
(
file2
)
+
1
,
int
(
total
)):
compare1
=
"pyfile"
+
str
(
i
)
+
".txt"
compare1
=
"pyf
unctions/pyf
ile"
+
str
(
i
)
+
".txt"
params
=
util
.
funcsim
(
file1name
,
compare1
)
name1
=
params
[
0
]
name2
=
params
[
1
]
...
...
util.py
View file @
3df6560b
...
...
@@ -51,43 +51,43 @@ VAR_EX = ("(?:(?:auto\s*|const\s*|unsigned\s*|"
"double
\
s*|_Bool
\
s*|complex
\
s*)+)(?:
\
s+
\
*?
\
*?
\
s*)("
"[a-zA-Z_][a-zA-Z0-9_]*)
\
s*[
\
[;,=)]"
)
def
file_metrics
(
fname
):
with
open
(
fname
)
as
f
:
f_l
=
f
.
readlines
()
length
=
0
func
=
0
var
=
0
for
line
in
f_l
:
var
+=
len
(
re
.
findall
(
VAR_EX
,
line
))
func
+=
len
(
re
.
findall
(
FUNC_EX
,
line
))
length
+=
1
#print(' '.join([str(length),str(func),str(var)]))
return
length
,
func
,
var
def
file_metrics
(
fname
):
with
open
(
fname
)
as
f
:
f_l
=
f
.
readlines
()
length
=
0
func
=
0
var
=
0
for
line
in
f_l
:
var
+=
len
(
re
.
findall
(
VAR_EX
,
line
))
func
+=
len
(
re
.
findall
(
FUNC_EX
,
line
))
length
+=
1
#print(' '.join([str(length),str(func),str(var)]))
return
length
,
func
,
var
def
sim
(
file1
,
file2
):
print
(
file1
+
' '
+
file2
)
len_f1
,
num_func1
,
num_var1
=
file_metrics
(
file1
)
len_f2
,
num_func2
,
num_var2
=
file_metrics
(
file2
)
with
open
(
file1
)
as
f1
:
with
open
(
file2
)
as
f2
:
jw
=
jellyfish
.
jaro_distance
(
f1
.
read
(),
f2
.
read
())
print
(
jw
)
min_len
=
min
(
len_f1
,
len_f2
)
max_len
=
max
(
len_f1
,
len_f2
)
min_func
=
min
(
num_func1
,
num_func2
)
max_func
=
max
(
num_func1
,
num_func2
)
min_var
=
min
(
num_var1
,
num_var2
)
max_var
=
max
(
num_var1
,
num_var2
)
if
max_func
!=
0
:
r_func
=
min_func
/
max_func
else
:
r_func
=
0
if
max_var
!=
0
:
r_var
=
min_var
/
max_var
else
:
r_var
=
0
if
max_len
!=
0
:
r_len
=
min_len
/
max_len
else
:
r_var
=
0
return
jw
+
r_func
+
r_var
+
r_len
def
sim
(
file1
,
file2
):
print
(
file1
+
' '
+
file2
)
len_f1
,
num_func1
,
num_var1
=
file_metrics
(
file1
)
len_f2
,
num_func2
,
num_var2
=
file_metrics
(
file2
)
with
open
(
file1
)
as
f1
:
with
open
(
file2
)
as
f2
:
jw
=
jellyfish
.
jaro_distance
(
f1
.
read
(),
f2
.
read
())
print
(
jw
)
min_len
=
min
(
len_f1
,
len_f2
)
max_len
=
max
(
len_f1
,
len_f2
)
min_func
=
min
(
num_func1
,
num_func2
)
max_func
=
max
(
num_func1
,
num_func2
)
min_var
=
min
(
num_var1
,
num_var2
)
max_var
=
max
(
num_var1
,
num_var2
)
if
max_func
!=
0
:
r_func
=
min_func
/
max_func
else
:
r_func
=
0
if
max_var
!=
0
:
r_var
=
min_var
/
max_var
else
:
r_var
=
0
if
max_len
!=
0
:
r_len
=
min_len
/
max_len
else
:
r_var
=
0
return
jw
+
r_func
+
r_var
+
r_len
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment