Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Ethan Mertz
/
CS-123-Final
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
79da2cf5
authored
Jun 04, 2018
by
Anselm Jia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
results
parent
664704fd
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
86 additions
and
11 deletions
Python/topfunctext.py
Python/toptext.py
Results/funcscores.json → Results/func_map.json
Results/var_map.json
variable_scores.txt → Results/variable_scores.txt
util.py
Python/topfunctext.py
0 → 100644
View file @
79da2cf5
from
mrjob.job
import
MRJob
import
util
import
heapq
import
jellyfish
import
json
#Sek k of top-k entries to collect.
CAPACITY
=
100
class
TopFuncText
(
MRJob
):
def
mapper_init
(
self
):
#Opens .json file for variables.
with
open
(
"var_map.json"
)
as
var
:
var
=
var
.
read
()
self
.
varible_score
=
json
.
loads
(
var
)
def
mapper
(
self
,
_
,
line
):
file1
,
file2
,
total
=
line
.
split
(
","
)
for
file_number
in
{
file1
,
file2
}:
#If file1 == file2
filename
=
"pyfile"
+
file_number
+
".txt"
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
#Get score for body text.
comparison_file
=
"pyfile"
+
str
(
i
)
+
".txt"
sims
=
util
.
funcsim
(
filename
,
comparison_file
)
text1
=
sims
[
4
]
text2
=
sims
[
5
]
parameters1
=
sims
[
2
]
parameters2
=
sims
[
3
]
ts
=
-
jellyfish
.
jaro_winkler
(
text1
,
text2
)
for
text
in
[(
filename
,
text1
,
parameters1
),
(
comparison_file
,
text2
,
parameters2
)]:
#Gets scores for variables/parameters and computes total.
vs
=
-
util
.
get_variable_score
(
self
.
varible_score
,
text
[
1
])
ps
=
-
util
.
get_variable_score
(
self
.
varible_score
,
text
[
2
])
yield
text
[
0
],
ts
+
vs
+
ps
def
combiner
(
self
,
file
,
scores
):
#Accumulates totals within node.
cs
=
list
(
scores
)
count
=
sum
(
cs
)
num
=
len
(
cs
)
yield
file
,
(
num
,
count
)
def
reducer_init
(
self
):
#Initialize with -inf in order to make sure the placeholder
#is dropped.
self
.
h
=
[(
-
float
(
"inf"
),
""
)]
heapq
.
heapify
(
self
.
h
)
def
reducer
(
self
,
word
,
counts
):
#Compute totals and means.
total_count
=
0
total_num
=
0
for
count
in
counts
:
total_count
+=
count
[
1
]
total_num
+=
count
[
0
]
avg
=
total_count
/
total_num
if
len
(
self
.
h
)
<
CAPACITY
:
heapq
.
heappush
(
self
.
h
,
(
avg
,
word
))
else
:
heapq
.
heappushpop
(
self
.
h
,
(
avg
,
word
))
def
reducer_final
(
self
):
for
i
in
range
(
len
(
self
.
h
)):
item
=
heapq
.
heappop
(
self
.
h
)
#Yield inverse again so we have smallest values.
yield
item
[
1
],
-
item
[
0
]
if
__name__
==
'__main__'
:
TopFuncText
.
run
()
\ No newline at end of file
Python/toptext.py
View file @
79da2cf5
...
@@ -5,16 +5,18 @@ import jellyfish
...
@@ -5,16 +5,18 @@ import jellyfish
import
json
import
json
#Sek k of top-k entries to collect.
#Sek k of top-k entries to collect.
CAPACITY
=
10
0
CAPACITY
=
10
class
TopText
(
MRJob
):
class
TopText
(
MRJob
):
def
mapper_init
(
self
):
def
mapper_init
(
self
):
#Opens .json files that have scores for functions/variables.
#Opens .json files that have scores for functions/variables.
with
open
(
"var_map.json"
)
as
var
:
with
open
(
"var_map.json"
)
as
var
:
self
.
varible_score
=
json
.
reads
(
var
)
var
=
var
.
read
()
self
.
varible_score
=
json
.
loads
(
var
)
with
open
(
"func_map.json"
)
as
func
:
with
open
(
"func_map.json"
)
as
func
:
self
.
function_score
=
json
.
reads
(
func
)
var
=
var
.
read
()
self
.
function_score
=
json
.
loads
(
func
)
def
mapper
(
self
,
_
,
line
):
def
mapper
(
self
,
_
,
line
):
file1
,
file2
,
total
=
line
.
split
(
","
)
file1
,
file2
,
total
=
line
.
split
(
","
)
...
@@ -23,11 +25,8 @@ class TopText(MRJob):
...
@@ -23,11 +25,8 @@ class TopText(MRJob):
filename
=
"pyfile"
+
file_number
+
".txt"
filename
=
"pyfile"
+
file_number
+
".txt"
file_text
=
filename
.
read
()
file_text
=
filename
.
read
()
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
for
i
in
range
(
int
(
file1
)
+
1
,
int
(
total
)):
++--
#Get score for body text.
Python
/
pyfuncsplit
.
py
|
24
+++++-
comparison_file
=
"pyfile"
+
str
(
i
)
+
".txt"
Python
/
topfunctions
.
py
|
22
++++--
Python
/
toptext
.
py
|
30
++++--
#Get score for body text.
comparison_file
=
"pyfile"
+
i
+
".txt"
comparison_text
=
comparison_file
.
read
()
comparison_text
=
comparison_file
.
read
()
ts
=
-
jellyfish
.
jaro_winkler
(
filename_text
,
comparison_text
)
ts
=
-
jellyfish
.
jaro_winkler
(
filename_text
,
comparison_text
)
for
text
in
[(
filename
,
file_text
),
(
comparison_file
,
comparison_text
)]:
for
text
in
[(
filename
,
file_text
),
(
comparison_file
,
comparison_text
)]:
...
@@ -69,4 +68,4 @@ class TopText(MRJob):
...
@@ -69,4 +68,4 @@ class TopText(MRJob):
yield
item
[
1
],
-
item
[
0
]
yield
item
[
1
],
-
item
[
0
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
MRWordFreqCount
.
run
()
TopText
.
run
()
\ No newline at end of file
\ No newline at end of file
Results/func
scores
.json
→
Results/func
_map
.json
View file @
79da2cf5
File moved
Results/var_map.json
0 → 100644
View file @
79da2cf5
This diff is collapsed.
Click to expand it.
variable_scores.txt
→
Results/
variable_scores.txt
View file @
79da2cf5
File moved
util.py
View file @
79da2cf5
...
@@ -43,7 +43,11 @@ def get_variable_score(d, text):
...
@@ -43,7 +43,11 @@ def get_variable_score(d, text):
total
=
0
total
=
0
for
variable
in
variables
:
for
variable
in
variables
:
total
+=
d
.
get
(
variable
,
DEFAULT_RETURN
)
total
+=
d
.
get
(
variable
,
DEFAULT_RETURN
)
return
total
/
num
if
total
:
score
=
total
/
num
else
:
score
=
0
return
score
def
get_function_score
(
d
,
text
):
def
get_function_score
(
d
,
text
):
'''
'''
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment