Commit 21c370a4 by Garrett Johnson

Merge branch 'master' of mit.cs.uchicago.edu:ajfriedman/election_district_drawing

parents 040e5d71 d1ec7630
algorithms directory:
algorithms.py contains all the functions for constructing district plans by utilizing
the efficiency gap
reading_shapes directory:
get_combined.py contains functions to convert shapefiles of census tracts, determine which tracts
are connected to each other, and analyze demographic data to estimate the number of Republicans and Democrats in each district
location_api.py uses the GPS coordinates of census tracts to determine which state they are in
and sorts them accordingly
state_set_json_files contains the partially completed tracts as dictionaries created by
location_api.py
state_tracts_full_json_files contains the completed tract dictionaries created by get_combined.py
shp_test1.py
state_num.csv
States.csv
states.json
tract.py
util.py
import queue
import csv
from tract import tract_dict
state = csv.reader(open("States.csv"))
state_num_dict = dict(state)
starting_tract = "geotag"
def build_one_district(starting_tract, tract_dict, district_num,
tracts_added):
q_districts = queue.Queue()
population = 0
dems = 0
reps = 0
q_districts.put(starting_tract)
while population < 250000:
gt = q_districts.get()
gt_connections = tract_dict[gt]["connections"]
population += tract_dict[gt]["total_population"]
dems += tract_dict[gt]["dems"]
reps += tract_dict[gt]["reps"]
tracts_added[gt] = district_num
for connect in gt_connections:
if connect not in tracts_added:
q_districts.put(geotag)
one_district = {"R": reps, "D": dems, "total_population": population,
"district": district_num}
return one_district
def build_all_districts(starting_tract, tract_dict, state_name):
list_districts = []
tracts_added = {}
for district_num in range(1,(state_num[state_name]+1)):
one_district = build_one_district(starting_tract,tract_dict,
tracts_added)
list_districts.append(one_district)
starting_tract = find_new_starting_tract()
return list_districts
def find_new_starting_tract():
return
\ No newline at end of file
......@@ -3,47 +3,25 @@ import json
import ast
from util import create_json_file, recover_json
# shapefile_path = "test_files/s_11au16.shp"
shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"
def get_shapeRecs(shapefile_path):
'''
Convert shapefiles into Python objects
'''
sf = shapefile.Reader(shapefile_path)
shapeRecs = sf.shapeRecords()
return shapeRecs
# def get_coordinates_connects(shapeRecs):
# sf_connect = set()
# sf_dict = {}
# sf_state = {}
# # add state loop in
# # set_states
# for num in range(74002):
# # rec = int(shapeRecs[num].record[2])
# # print(shapeRecs[num].record)
# rec = shapeRecs[num].record[0]
# points = shapeRecs[num].shape.points
# # if rec in sf_state:
# #
# for point in points:
# if point in sf_dict:
# list_num = sf_dict[point]
# for past_num in list_num:
# if not past_num in sf_dict and not (past_num == rec):
# # if not past_num in sf_dict and (past_num-rec):
# sf_connect.add((rec,past_num))
# sf_connect.add((past_num,rec))
# sf_dict[point] += [rec]
# elif point not in sf_dict:
# sf_dict[point] = [rec]
# return sf_connect
def get_states_connects(shapeRecs, state_set):
'''
Find all tract connections in a given state
'''
sf_connects = set()
dict_points = {}
for num in range(74002):
# rec = int(shapeRecs[num].record[2])
# print(shapeRecs[num].record)
rec = shapeRecs[num].record[0]
if rec in state_set:
points = shapeRecs[num].shape.points
......@@ -60,32 +38,37 @@ def get_states_connects(shapeRecs, state_set):
return sf_connects
def get_connects_traits(sf_connect):
def get_connects_tracts(sf_connect):
'''
Determine if two tracts are connected
'''
count = 0
dict_traits = {}
dict_tracts = {}
for sh_num1, sh_num2 in sf_connect:
if sh_num1 not in dict_traits:
dict_traits[sh_num1] = [sh_num2]
if sh_num1 not in dict_tracts:
dict_traics[sh_num1] = [sh_num2]
count += 1
if sh_num1 in dict_traits:
if sh_num2 not in dict_traits[sh_num1]:
dict_traits[sh_num1].append(sh_num2)
return dict_traits
if sh_num1 in dict_tracts:
if sh_num2 not in dict_tracts[sh_num1]:
dict_tracts[sh_num1].append(sh_num2)
return dict_tracts
# def get_connects(shapeRecs):
# dict_connect = get_coordinates_connects(shapeRecs)
# dict_tracts = get_connects_traits(dict_tracts)
# return dict_tracts
def get_full_connects_state(shapeRecs, state_set):
'''
Find all tract connections within a given state
'''
dict_connect = get_states_connects(shapeRecs, state_set)
dict_tracts = get_connects_traits(dict_connect)
return dict_tracts
def age_DR(record):
'''
Use age information in a tract to weight the number of Republicans and Democrats
NOTE: We determined how to weight the predictive power of demographics using CNN's
2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
'''
total_population = record[6]
weight_R = 0
weight_D= 0
......@@ -124,6 +107,11 @@ def age_DR(record):
def race_RD(record):
'''
Use race information in a tract to weight the number of Republicans and Democrats
NOTE: We determined how to weight the predictive power of demographics using CNN's
2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
'''
total_population = record[6]
weight_R = 0
weight_D = 0
......@@ -154,8 +142,12 @@ def race_RD(record):
return percent_RD
def gender_RD(record):
'''
Use age information in a tract to weight the number of Republicans and Democrats
NOTE: We determined how to weight the predictive power of demographics using CNN's
2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
'''
total_population = record[6]
weight_R = 0
weight_D = 0
......@@ -183,6 +175,10 @@ def gender_RD(record):
def get_populations_RD(shapeRecs, connect_tracts, state_set):
'''
Apply weighting for age, race, and gender to estimate the number of Republicans and Democrats
in a tract and return the tract information as a dictionary
'''
tracts_dict = {}
num_tracts_in_state = len(state_set)
for num in range(74002):
......@@ -224,40 +220,29 @@ def get_populations_RD(shapeRecs, connect_tracts, state_set):
# Needs fixing:
# Set instead of dictionary
# Sizes
# def create_tract(shapeRecs):
# connect_tracts = get_connects(shapeRecs)
# print("done with get connects")
# dict_tracts = get_populations_RD(shapeRecs,connect_tracts)
# print("done with get tracts")
# create_json_file(dict_tracts, "tracts" + "_run_1")
# print("json created")
# return dict_tracts
def create_state_tracts(shapeRecs):
'''
Recover the json files created in location_api.py that have sorted tracts by state
Create json files containing all tracts represented as dictionaries with their necessary
data and grouped by state, storing them in state_tracts_full_json_files
'''
state_tracts = {}
state_dict_of_sets = recover_json("state_set_json_files/" + "Master_tract_sets.json")
for state in state_dict_of_sets:
print(state, len(state_dict_of_sets[state]))
state_set = state_dict_of_sets[state]
connect_tracts = get_full_connects_state(shapeRecs, state_set)
print("done with get connects")
dict_tracts = get_populations_RD(shapeRecs,connect_tracts, state_set)
print("done with get tracts")
state_tracts[state] = dict_tracts
create_json_file(dict_tracts, "state_tracts_full_json_files/" + state + "tract_dict")
print("json created")
Master_state_tracts = state_tracts_hard_coded(state_tracts)
create_json_file(Master_state_tracts, "state_tracts_full_json_files/" + "Master_tract_dicts")
print("Master and state json hardcoded")
return Master_state_tracts
def state_tracts_hard_coded(Master_tract_dicts):
'''
Hard code connections for islands in certain states to ensure that there
are not unconnected tracts
'''
hc_islands = { "California": {"06075980401": "06041132200",
"06037599100": "06037990300" },
"Florida": {"12087980100": "12087990000" },
......
import shapefile
# shapefile_path = "test_files/s_11au16.dbf"
shapefile_path = "test_files/s_11au16.shp"
# shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"
sf = shapefile.Reader(shapefile_path)
shapeRecs = sf.shapeRecords()
def get_coordinates_connects(shapeRecs):
sf_connect = {}
sf_dict = {}
for num in range(52):
print(num)
# rec = int(shapeRecs[num].record[2])
# print(shapeRecs[num].record)
rec = shapeRecs[num].record[0]
points = shapeRecs[num].shape.points
for point in points:
if point in sf_dict:
list_num = sf_dict[point]
for past_num in list_num:
if not past_num in sf_dict and not (past_num == rec):
# if not past_num in sf_dict and (past_num-rec):
sf_connect[(rec,past_num)] = ' '
sf_connect[(past_num,rec)] = ' '
sf_dict[point] += [rec]
elif point not in sf_dict:
sf_dict[point] = [rec]
return sf_connect
def get_connects_traits(sf_connect):
count = 0
sf_traits = {}
for sh_num1, sh_num2 in sf_connect:
if sh_num1 not in sf_traits:
sf_traits[sh_num1] = [sh_num2]
count += 1
if sh_num1 in sf_traits:
if sh_num2 not in sf_traits[sh_num1]:
sf_traits[sh_num1].append(sh_num2)
return sf_traits
def get_connects(shapeRecs):
sf_connect = get_coordinates_connects(shapeRecs)
sf_traits = get_connects_traits(sf_connect)
sf_connect = {}
return sf_traits
# import shapefile
# shapefile_path = "test_files/s_11au16.dbf"
# # shapefile_path = "test_files/s_11au16.shp"
# sf = shapefile.Reader(shapefile_path)
# shapes = sf.shapes()
# sf_connect = {}
# sf_dict = {}
# for num in range(57):
# points = shapes[num].points
# for point in points:
# if point in sf_dict:
# past_num = sf_dict[point]
# if not past_num in sf_dict and (num - past_num):
# sf_connect[(num,past_num)] = ' '
# sf_connect[(past_num,num)] = ' '
# elif point not in sf_dict:
# sf_dict[point] = num
# count = 0
# sf_children = {}
# for sh_num1, sh_num2 in sf_connect:
# if (sh_num1 - sh_num2):
# if sh_num1 not in sf_children:
# sf_children[sh_num1] = [sh_num2]
# count += 1
# if sh_num1 in sf_children:
# if sh_num2 not in sf_children[sh_num1]:
# sf_children[sh_num1].append(sh_num2)
# print(sf_children)
# # A few problems:
# # Multiple items on each
# # Set instead of a dictionary
# # If no connections add to list
# import shapefile
# shapefile_path = "test_files/s_11au16.dbf"
# # shapefile_path = "test_files/s_11au16.shp"
# sf = shapefile.Reader(shapefile_path)
# shapes = sf.shapes()
# sf_connect = {}
# sf_dict = {}
# for num in range(57):
# points = shapes[num].points
# for point in points:
# if point in sf_dict:
# list_num = sf_dict[point]
# for past_num in list_num:
# if not past_num in sf_dict and (num - past_num):
# sf_connect[(num,past_num)] = ' '
# sf_connect[(past_num,num)] = ' '
# sf_dict[point] += [num]
# elif point not in sf_dict:
# sf_dict[point] = [num]
# count = 0
# sf_children = {}
# for sh_num1, sh_num2 in sf_connect:
# if (sh_num1 - sh_num2):
# if sh_num1 not in sf_children:
# sf_children[sh_num1] = [sh_num2]
# count += 1
# if sh_num1 in sf_children:
# if sh_num2 not in sf_children[sh_num1]:
# sf_children[sh_num1].append(sh_num2)
# print(sf_children)
# import shapefile
# shapefile_path = "test_files/s_11au16.dbf"
# # shapefile_path = "test_files/s_11au16.shp"
# sf = shapefile.Reader(shapefile_path)
# shapeRecs = sf.shapeRecords()
# sf_connect = {}
# sf_dict = {}
# for num in range(57):
# # rec = int(shapeRecs[num].record[2])
# # print(shapeRecs[num].record)
# rec = shapeRecs[num].record[0]
# points = shapeRecs[num].shape.points
# for point in points:
# if point in sf_dict:
# list_num = sf_dict[point]
# for past_num in list_num:
# if not past_num in sf_dict and not (past_num == rec):
# # if not past_num in sf_dict and (past_num-rec):
# sf_connect[(rec,past_num)] = ' '
# sf_connect[(past_num,rec)] = ' '
# sf_dict[point] += [rec]
# elif point not in sf_dict:
# sf_dict[point] = [rec]
# count = 0
# sf_children = {}
# for sh_num1, sh_num2 in sf_connect:
# if sh_num1 not in sf_children:
# sf_children[sh_num1] = [sh_num2]
# count += 1
# if sh_num1 in sf_children:
# if sh_num2 not in sf_children[sh_num1]:
# sf_children[sh_num1].append(sh_num2)
# print(sf_children)
# Needs fixing:
# Set instead of dictionary
# Sizes
# Putting items that do not connect
\ No newline at end of file
import shapefile
shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"
# shapefile_path = "test_files/s_11au16.shp"
sf = shapefile.Reader(shapefile_path)
def age_DR(record):
total_population = record[6]
weight_R = 0
weight_D= 0
pop_18_24 = record[10]*.75 + record[11]
weight_D += pop_18_24 * -0.25
pop_25_29 = record[12]
weight_D += pop_25_29 * -0.22
pop_30_39 = sum(record[13:15])
weight_D += pop_30_39 * -0.13
pop_40_49 = sum(record[15:17])
weight_R += pop_40_49 * 0.02
pop_50_64 = sum(record[17:20])
weight_R += pop_50_64 * 0.05
pop_over_65 = sum(record[20:25])
weight_R += pop_over_65 * 0.12
total_population_above_16 = pop_18_24 + pop_25_29 + pop_30_39 + pop_40_49 + pop_50_64 + pop_over_65
if not total_population_above_16:
return 0.0
population_change = 1 - (total_population_above_16/total_population) + 1
weight_D = weight_D * population_change
weight_R = weight_R * population_change
add_weights = weight_R + weight_D
if not add_weights:
return 0.0
percent_RD =((add_weights)/(abs(weight_D)+weight_R))
return percent_RD
def race_RD(record):
total_population = record[6]
weight_R = 0
weight_D= 0
pop_white = record[83]
weight_R += pop_white * 0.20
pop_african_american = record[84]
weight_D += pop_african_american * -0.88
pop_latino = record[112]
weight_D += pop_latino * -0.45
pop_asian = record[86]
weight_D += pop_asian * -0.21
total_population_non_other = pop_white + pop_african_american + pop_latino + pop_asian
if not total_population_non_other:
return 0.0
population_change = 1 - (total_population_non_other/total_population) + 1
weight_D = weight_D * population_change
weight_R = weight_R * population_change
add_weights = weight_R + weight_D
if not add_weights:
return 0.0
percent_RD =((add_weights)/(abs(weight_D)+weight_R))
return percent_RD
def gender_RD(record):
total_population = record[6]
weight_R = 0
weight_D = 0
pop_male = record[67]
weight_R += pop_male * 0.07
pop_female = record[68]
weight_D += pop_female * -0.12
total_population_above_16 = pop_female + pop_male
if not total_population_above_16:
return 0.0
population_change = 1 - (total_population_above_16/total_population) + 1
weight_D = weight_D * population_change
weight_R = weight_R * population_change
add_weights = weight_R + weight_D
if not add_weights:
return 0.0
percent_RD =((add_weights)/(abs(weight_D)+weight_R))
return percent_RD
def test_record(sf):
records = sf.records()
print(5)
tracts_dict = {}
for record in records:
record_dict = {}
total_population = record[6]
if total_population:
gender = gender_RD(record)
race = race_RD(record)
age = age_DR(record)
RD_final = ((gender+race+age)/3)/2
if RD_final > 0:
reps = (.5 + RD_final)* total_population
dems = (.5 - RD_final) * total_population
elif RD_final <= 0:
dems = (.5 + abs(RD_final))*total_population
reps = (.5 - abs(RD_final)) *total_population
record_dict["total_population"] = total_population
record_dict["D"] = dems
record_dict["R"] = reps
elif not total_population:
record_dict["total_population"] = 0
record_dict["D"] = 0
record_dict["R"] = 0
record_dict["lat"] = record[4]
record_dict["lon"] = record[5]
tracts_dict[record[0]] = record_dict
return tracts_dict
# num_of_shape1 = 100000
# def test_shape(sf)
# for shape_index_num in range(len):
# s = sf.shape(shape_index_num)
# count += 1
# # w = shapefile.Writer(s)
# # print(w)
# # w.field('FIRST_FLD','C','40')
# print(count)
# # w.record('First','Point')
......@@ -6,13 +6,14 @@ from util import create_json_file, recover_json
API_KEY = "6865b163d2db4c501f40"
def tracts_state(API_KEY):
'''
Use the GPS coordinates of tracts to determine which state they are in
To translate GPS coordinates into states, we used the LocationIQ API, found here: http://locationiq.org/
'''
dict_tracts = recover_json("tracts1.json")
dict_states = recover_json("statesrun_5.json")
print("statesrun_5.json")
balance = check_balance(API_KEY)
print(balance)
empty = []
# dict_states = {}
count = 0
for tract in dict_tracts:
if tract not in dict_states:
......@@ -23,7 +24,6 @@ def tracts_state(API_KEY):
if _lat[0] == "+":
_lat = _lat[1:]
request_URL = "http://locationiq.org/v1/reverse.php?format=json&key=" + API_KEY + "&lat=" + _lat + "&lon=" + _lon + "&zoom=16"
print(request_URL, count)
location_info = requests.get(request_URL)
location_info_text = location_info.text
location_info_dict = ast.literal_eval(location_info_text)
......@@ -47,8 +47,10 @@ def tracts_state(API_KEY):
def check_balance(API_KEY):
'''
Check how many more times we could use the LocationIQ API in a day
'''
request_URL = "http://locationiq.org/v1/balance.php?key=" + API_KEY
print(request_URL)
balance_request = requests.get(request_URL)
balance_text = balance_request.text
balance_dict = ast.literal_eval(balance_text)
......@@ -57,6 +59,9 @@ def check_balance(API_KEY):
return final_balance
def hard_code(output=False):
'''
Hard code island tracts that were not properly attributed to their respective states
'''
dict_states = recover_json("statesrun_6.json")
hc_dict = {
"06083001800": "California",
......@@ -79,6 +84,11 @@ def hard_code(output=False):
return dict_states
def create_state_sets(output=False):
'''
Create a json file with dictionaries for each tract as a Master file
Create additional json files sorted by state
Store files in state_set_json_files
'''
dict_states_w_tracts = {}
dict_states = recover_json("states_hc.json")
for tract, state in dict_states.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment