Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Ethan Mertz
/
CS-123-Final
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8d16ddb7
authored
May 31, 2018
by
Anselm Jia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
variables
parent
4c461b2d
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
47 additions
and
38 deletions
Python/collectvariables.py
Python/toptext.py
Python/topvariables.py
util.py
Python/collectvariables.py
View file @
8d16ddb7
...
...
@@ -2,7 +2,7 @@ import re
import
jellyfish
import
glob
var
=
"[
\
s]*([
\
w, ]+
)[
\
s]*=[^=]"
var
=
"[
\
s]*([
a-zA-Z_][
\
w, ]*
)[
\
s]*=[^=]"
def
get_unique_vars
(
filename
):
unique_vs
=
set
()
...
...
@@ -21,7 +21,8 @@ def main():
print
(
"working on "
+
filename
)
s
=
get_unique_vars
(
filename
)
with
open
(
"pyvariables/varfile"
+
str
(
count
)
+
".txt"
,
"w"
)
as
f
:
f
.
write
(
repr
(
s
))
for
var
in
s
:
f
.
write
(
var
+
"
\n
"
)
count
+=
1
...
...
Python/toptext.py
View file @
8d16ddb7
...
...
@@ -16,7 +16,7 @@ class MRWordFreqCount(MRJob):
params
=
util
.
funcsim
(
file1name
,
compare1
)
text1
=
params
[
4
]
text2
=
params
[
5
]
simscore
=
jellyfish
.
jaro_distance
(
text1
,
text2
)
simscore
=
jellyfish
.
jaro_distance
(
text1
,
text2
)
yield
file1name
,
simscore
yield
compare1
,
simscore
if
file1
!=
file2
:
...
...
Python/topvariables.py
View file @
8d16ddb7
...
...
@@ -12,30 +12,38 @@ class TopVariables(MRJob):
def
mapper
(
self
,
_
,
line
):
file1
,
file2
,
total
=
line
.
split
(
","
)
file1name
=
"pyfile"
+
file1
+
".txt"
file2name
=
"pyfile"
+
file2
+
".txt"
f1
=
file1
.
readlines
()
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
compare
=
"pyfile"
+
str
(
i
)
+
".txt"
compare1
=
compare
.
readlines
()
for
variable
in
f1
:
for
comp
in
compare1
:
simscore
=
-
jellyfish
.
levenshtein_distance
(
variable
,
comp
)
yield
variable
,
(
1
,
simscore
)
yield
comp
,
(
1
,
simscore
)
file1name
=
"varfile"
+
file1
+
".txt"
file2name
=
"varfile"
+
file2
+
".txt"
with
open
(
file1name
)
as
f1
:
f1vars
=
f1
.
readlines
()
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
compare
=
"varfile"
+
str
(
i
)
+
".txt"
with
open
(
compare
)
as
compare1
:
compare1vars
=
compare1
.
readlines
()
for
variable
in
f1vars
:
variable
=
variable
.
strip
()
for
comp
in
compare1vars
:
comp
=
comp
.
strip
()
simscore
=
-
jellyfish
.
levenshtein_distance
(
variable
,
comp
)
yield
variable
,
(
1
,
simscore
)
yield
comp
,
(
1
,
simscore
)
if
file1
!=
file2
:
f2
=
file2
.
readlines
()
for
i
in
range
(
int
(
file2
)
+
1
,
int
(
total
)):
compare
=
"pyfile"
+
str
(
i
)
+
".txt"
compare2
=
compare
.
readlines
()
for
variable
in
f2
:
for
comp
in
compare2
:
simscore
=
-
jellyfish
.
levenshtein_distance
(
variable
,
comp
)
yield
variable
,
(
1
,
simscore
)
yield
comp
,
(
1
,
simscore
)
yield
None
,
(
len
(
f1
)
+
len
(
f2
))
with
open
(
file2name
)
as
f2
:
f2vars
=
f2
.
readlines
()
for
i
in
range
(
int
(
file2
)
+
1
,
int
(
total
)):
compare
=
"varfile"
+
str
(
i
)
+
".txt"
with
open
(
compare
)
as
compare2
:
compare2vars
=
compare2
.
readlines
()
for
variable
in
f2
:
variable
=
variable
.
strip
()
for
comp
in
compare2vars
:
comp
=
comp
.
strip
()
simscore
=
-
jellyfish
.
levenshtein_distance
(
variable
,
comp
)
yield
variable
,
(
1
,
simscore
)
yield
comp
,
(
1
,
simscore
)
yield
None
,
(
len
(
f1vars
)
+
len
(
f2vars
))
else
:
yield
None
,
len
(
f1
)
yield
None
,
len
(
f1
vars
)
def
combiner
(
self
,
name
,
scores
):
...
...
@@ -53,7 +61,7 @@ class TopVariables(MRJob):
def
reducer_init
(
self
):
self
.
h
=
[(
-
float
(
"inf"
),
""
)]
self
.
total
self
.
total
=
None
heapq
.
heapify
(
self
.
h
)
def
reducer
(
self
,
name
,
scores
):
...
...
util.py
View file @
8d16ddb7
...
...
@@ -17,6 +17,18 @@ def funcsim(file1name,file2name):
Functions for C
'''
FUNC_EX
=
(
"(?:(?:auto
\
s*|const
\
s*|unsigned
\
s*|signed
\
s*|"
"register
\
s*|volatile
\
s*|static
\
s*|void
\
s*|short"
"
\
s*|long
\
s*|char
\
s*|int
\
s*|float
\
s*|double
\
s*|"
"_Bool
\
s*|complex
\
s*)+)(?:
\
s+
\
*?
\
*?
\
s*)([a-zA-Z"
"_][a-zA-Z0-9_]*) *
\
("
)
VAR_EX
=
(
"(?:(?:auto
\
s*|const
\
s*|unsigned
\
s*|"
"signed
\
s*|register
\
s*|volatile
\
s*|static
\
s*|"
"void
\
s*|short
\
s*|long
\
s*|char
\
s*|int
\
s*|float
\
s*|"
"double
\
s*|_Bool
\
s*|complex
\
s*)+)(?:
\
s+
\
*?
\
*?
\
s*)("
"[a-zA-Z_][a-zA-Z0-9_]*)
\
s*[
\
[;,=)]"
)
def
file_metrics
(
fname
):
with
open
(
fname
)
as
f
:
f_l
=
f
.
readlines
()
...
...
@@ -30,18 +42,6 @@ Functions for C
#print(' '.join([str(length),str(func),str(var)]))
return
length
,
func
,
var
FUNC_EX
=
(
"(?:(?:auto
\
s*|const
\
s*|unsigned
\
s*|signed
\
s*|"
"register
\
s*|volatile
\
s*|static
\
s*|void
\
s*|short"
"
\
s*|long
\
s*|char
\
s*|int
\
s*|float
\
s*|double
\
s*|"
"_Bool
\
s*|complex
\
s*)+)(?:
\
s+
\
*?
\
*?
\
s*)([a-zA-Z"
"_][a-zA-Z0-9_]*) *
\
("
)
VAR_EX
=
(
"(?:(?:auto
\
s*|const
\
s*|unsigned
\
s*|"
"signed
\
s*|register
\
s*|volatile
\
s*|static
\
s*|"
"void
\
s*|short
\
s*|long
\
s*|char
\
s*|int
\
s*|float
\
s*|"
"double
\
s*|_Bool
\
s*|complex
\
s*)+)(?:
\
s+
\
*?
\
*?
\
s*)("
"[a-zA-Z_][a-zA-Z0-9_]*)
\
s*[
\
[;,=)]"
)
def
sim
(
file1
,
file2
):
print
(
file1
+
' '
+
file2
)
len_f1
,
num_func1
,
num_var1
=
file_metrics
(
file1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment