Merge branch 'master' of mit.cs.uchicago.edu:ajfriedman/election_district_drawing

21c370a4 · Garrett Johnson · 040e5d71 · d1ec7630 · 21c370a4 · 21c370a4
Commit 21c370a4 authored Mar 14, 2017 by Garrett Johnson
Showing with 88 additions and 451 deletions
README.txt
reading_shapes/__pycache__/util.cpython-34.pyc
reading_shapes/building_districts.py
reading_shapes/get_combined.py
reading_shapes/get_connections.py
reading_shapes/get_population.py
reading_shapes/location_api.py
--- a/README.txt
+++ b/README.txt
+algorithms directory:
+    algorithms.py contains all the functions for constructing district plans by utilizing
+        the efficiency gap
+
+reading_shapes directory:
+    get_combined.py contains functions to convert shapefiles of census tracts, determine which tracts
+        are connected to each other, and analyze demographic data to estimate the number of Republicans and Democrats in each district
+    location_api.py uses the GPS coordinates of census tracts to determine which state they are in 
+        and sorts them accordingly 
+    state_set_json_files contains the partially completed tracts as dictionaries created by
+        location_api.py    
+    state_tracts_full_json_files contains the completed tract dictionaries created by get_combined.py
+
+    shp_test1.py
+    state_num.csv
+    States.csv
+    states.json
+    tract.py
+    util.py
+
+    
+    
+
+
--- a/reading_shapes/__pycache__/util.cpython-34.pyc
+++ b/reading_shapes/__pycache__/util.cpython-34.pyc
--- a/reading_shapes/building_districts.py
+++ b/reading_shapes/building_districts.py
-import queue
-import csv
-from tract import tract_dict
-
-state = csv.reader(open("States.csv"))
-state_num_dict = dict(state)
-
-starting_tract = "geotag"
-
-def build_one_district(starting_tract, tract_dict, district_num,
-    tracts_added):
-    q_districts = queue.Queue()
-    population = 0
-    dems = 0
-    reps = 0
-
-    q_districts.put(starting_tract)
-    while population < 250000:
-        gt = q_districts.get()
-        gt_connections = tract_dict[gt]["connections"]
-        population += tract_dict[gt]["total_population"]
-        dems += tract_dict[gt]["dems"]
-        reps += tract_dict[gt]["reps"]
-        tracts_added[gt] = district_num
-        for connect in gt_connections:
-            if connect not in tracts_added:
-                q_districts.put(geotag)
-    one_district = {"R": reps, "D": dems, "total_population": population,
-    "district": district_num}
-    return one_district
-
-
-
-def build_all_districts(starting_tract, tract_dict, state_name):
-    list_districts = []
-    tracts_added = {}
-    for district_num in range(1,(state_num[state_name]+1)):
-        one_district = build_one_district(starting_tract,tract_dict,
-            tracts_added)
-        list_districts.append(one_district)
-        starting_tract = find_new_starting_tract()
-    return list_districts
-
-
-def find_new_starting_tract():
-    return 
\ No newline at end of file
--- a/reading_shapes/get_combined.py
+++ b/reading_shapes/get_combined.py
@@ -3,47 +3,25 @@ import json
 import ast
 from util import create_json_file, recover_json

-# shapefile_path = "test_files/s_11au16.shp"
+

 shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"

 def get_shapeRecs(shapefile_path):
+    '''
+    Convert shapefiles into Python objects
+    '''
    sf = shapefile.Reader(shapefile_path)
    shapeRecs = sf.shapeRecords()
    return shapeRecs

-# def get_coordinates_connects(shapeRecs):
-#     sf_connect = set()
-#     sf_dict = {}
-#     sf_state = {}
-#     # add state loop in 
-#     # set_states
-#     for num in range(74002):
-#         # rec = int(shapeRecs[num].record[2])
-#         # print(shapeRecs[num].record)
-#         rec = shapeRecs[num].record[0]
-#         points = shapeRecs[num].shape.points
-#         # if rec in sf_state:
-#         #
-#         for point in points:
-#             if point in sf_dict:
-#                 list_num = sf_dict[point]
-#                 for past_num in list_num:
-#                     if not past_num in sf_dict and not (past_num == rec):
-#                     # if not past_num in sf_dict and (past_num-rec):
-#                         sf_connect.add((rec,past_num))
-#                         sf_connect.add((past_num,rec))
-#                 sf_dict[point] += [rec]
-#             elif point not in sf_dict:
-#                 sf_dict[point] = [rec]
-#     return sf_connect
-
 def get_states_connects(shapeRecs, state_set):
+    '''
+    Find all tract connections in a given state
+    '''
    sf_connects = set()
    dict_points = {}
    for num in range(74002):
-        # rec = int(shapeRecs[num].record[2])
-        # print(shapeRecs[num].record)
        rec = shapeRecs[num].record[0]
        if rec in state_set:
            points = shapeRecs[num].shape.points
@@ -60,32 +38,37 @@ def get_states_connects(shapeRecs, state_set):
    return sf_connects


-
-def get_connects_traits(sf_connect):
+def get_connects_tracts(sf_connect):
+    '''
+    Determine if two tracts are connected
+    '''
    count = 0
-    dict_traits = {}
+    dict_tracts = {}
    for sh_num1, sh_num2 in sf_connect:
-        if sh_num1 not in dict_traits:
-            dict_traits[sh_num1] = [sh_num2]
+        if sh_num1 not in dict_tracts:
+            dict_traics[sh_num1] = [sh_num2]
            count += 1
-        if sh_num1 in dict_traits:
-            if sh_num2 not in dict_traits[sh_num1]:
-                dict_traits[sh_num1].append(sh_num2)
-    return dict_traits
-
+        if sh_num1 in dict_tracts:
+            if sh_num2 not in dict_tracts[sh_num1]:
+                dict_tracts[sh_num1].append(sh_num2)
+    return dict_tracts

-# def get_connects(shapeRecs):
-#     dict_connect = get_coordinates_connects(shapeRecs)
-#     dict_tracts = get_connects_traits(dict_tracts)
-#     return dict_tracts

 def get_full_connects_state(shapeRecs, state_set):
+    '''
+    Find all tract connections within a given state
+    '''
    dict_connect = get_states_connects(shapeRecs, state_set)
    dict_tracts = get_connects_traits(dict_connect)
    return dict_tracts


 def age_DR(record):
+    '''
+    Use age information in a tract to weight the number of Republicans and Democrats
+    NOTE: We determined how to weight the predictive power of demographics using CNN's
+    2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
+    '''
    total_population = record[6]
    weight_R = 0
    weight_D= 0 
@@ -124,6 +107,11 @@ def age_DR(record):


 def race_RD(record):
+    '''
+    Use race information in a tract to weight the number of Republicans and Democrats
+    NOTE: We determined how to weight the predictive power of demographics using CNN's
+    2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
+    '''
    total_population = record[6]
    weight_R = 0
    weight_D = 0 
@@ -154,8 +142,12 @@ def race_RD(record):
    return percent_RD


-
 def gender_RD(record):
+    '''
+    Use age information in a tract to weight the number of Republicans and Democrats
+    NOTE: We determined how to weight the predictive power of demographics using CNN's
+    2012 exit poll data, found here: http://www.cnn.com/election/2012/results/race/president/
+    '''
    total_population = record[6]
    weight_R = 0
    weight_D = 0 
@@ -183,6 +175,10 @@ def gender_RD(record):


 def get_populations_RD(shapeRecs, connect_tracts, state_set):
+    '''
+    Apply weighting for age, race, and gender to estimate the number of Republicans and Democrats
+    in a tract and return the tract information as a dictionary
+    '''
    tracts_dict = {}
    num_tracts_in_state = len(state_set)
    for num in range(74002):
@@ -224,40 +220,29 @@ def get_populations_RD(shapeRecs, connect_tracts, state_set):



-
-
-# Needs fixing:
-# Set instead of dictionary
-# Sizes
-
-# def create_tract(shapeRecs):
-#     connect_tracts = get_connects(shapeRecs)
-#     print("done with get connects")
-#     dict_tracts = get_populations_RD(shapeRecs,connect_tracts)
-#     print("done with get tracts")
-#     create_json_file(dict_tracts, "tracts" + "_run_1")
-#     print("json created")
-#     return dict_tracts
-
 def create_state_tracts(shapeRecs):
+    '''
+    Recover the json files created in location_api.py that have sorted tracts by state
+    Create json files containing all tracts represented as dictionaries with their necessary
+    data and grouped by state, storing them in state_tracts_full_json_files
+    '''
    state_tracts = {}
    state_dict_of_sets = recover_json("state_set_json_files/" + "Master_tract_sets.json")
    for state in state_dict_of_sets:
-        print(state, len(state_dict_of_sets[state]))
        state_set = state_dict_of_sets[state]
        connect_tracts = get_full_connects_state(shapeRecs, state_set)
-        print("done with get connects")
        dict_tracts = get_populations_RD(shapeRecs,connect_tracts, state_set)
-        print("done with get tracts")
        state_tracts[state] = dict_tracts
        create_json_file(dict_tracts, "state_tracts_full_json_files/" + state + "tract_dict")
-        print("json created")
    Master_state_tracts = state_tracts_hard_coded(state_tracts)
    create_json_file(Master_state_tracts, "state_tracts_full_json_files/" + "Master_tract_dicts")
-    print("Master and state json hardcoded")
    return Master_state_tracts

 def state_tracts_hard_coded(Master_tract_dicts):
+    '''
+    Hard code connections for islands in certain states to ensure that there
+    are not unconnected tracts  
+    '''
    hc_islands = { "California": {"06075980401": "06041132200",
                    "06037599100": "06037990300" }, 
                "Florida": {"12087980100": "12087990000" }, 

--- a/reading_shapes/get_connections.py
+++ b/reading_shapes/get_connections.py
-import shapefile
-
-
-# shapefile_path = "test_files/s_11au16.dbf"
-shapefile_path = "test_files/s_11au16.shp"
-# shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"
-
-sf = shapefile.Reader(shapefile_path)
-shapeRecs = sf.shapeRecords()
-
-def get_coordinates_connects(shapeRecs):
-    sf_connect = {}
-    sf_dict = {}
-    for num in range(52):
-        print(num)
-        # rec = int(shapeRecs[num].record[2])
-        # print(shapeRecs[num].record)
-        rec = shapeRecs[num].record[0]
-        points = shapeRecs[num].shape.points
-        for point in points:
-            if point in sf_dict:
-                list_num = sf_dict[point]
-                for past_num in list_num:
-                    if not past_num in sf_dict and not (past_num == rec):
-                    # if not past_num in sf_dict and (past_num-rec):
-                        sf_connect[(rec,past_num)] = ' '
-                        sf_connect[(past_num,rec)] = ' '
-                sf_dict[point] += [rec]
-            elif point not in sf_dict:
-                sf_dict[point] = [rec]
-    return sf_connect
-
-def get_connects_traits(sf_connect):
-    count = 0
-    sf_traits = {}
-    for sh_num1, sh_num2 in sf_connect:
-        if sh_num1 not in sf_traits:
-            sf_traits[sh_num1] = [sh_num2]
-            count += 1
-        if sh_num1 in sf_traits:
-            if sh_num2 not in sf_traits[sh_num1]:
-                sf_traits[sh_num1].append(sh_num2)
-    return sf_traits
-
-def get_connects(shapeRecs):
-    sf_connect = get_coordinates_connects(shapeRecs)
-    sf_traits = get_connects_traits(sf_connect)
-    sf_connect = {}
-    return sf_traits
-
-# import shapefile
-
-
-# shapefile_path = "test_files/s_11au16.dbf"
-# # shapefile_path = "test_files/s_11au16.shp"
-
-# sf = shapefile.Reader(shapefile_path)
-# shapes = sf.shapes()
-
-# sf_connect = {}
-# sf_dict = {}
-# for num in range(57):
-#     points = shapes[num].points
-#     for point in points:
-#         if point in sf_dict:
-#             past_num = sf_dict[point]
-#             if not past_num in sf_dict and (num - past_num):
-#                 sf_connect[(num,past_num)] = ' '
-#                 sf_connect[(past_num,num)] = ' '
-#         elif point not in sf_dict:
-#             sf_dict[point] = num
-
-# count = 0
-# sf_children = {}
-# for sh_num1, sh_num2 in sf_connect:
-#     if (sh_num1 - sh_num2):
-#         if sh_num1 not in sf_children:
-#             sf_children[sh_num1] = [sh_num2]
-#             count += 1
-#         if sh_num1 in sf_children:
-#             if sh_num2 not in sf_children[sh_num1]:
-#                 sf_children[sh_num1].append(sh_num2)
-
-# print(sf_children)
-
-# # A few problems:
-# # Multiple items on each 
-# # Set instead of a dictionary
-# # If no connections add to list 
-
-
-
-# import shapefile
-
-
-# shapefile_path = "test_files/s_11au16.dbf"
-# # shapefile_path = "test_files/s_11au16.shp"
-
-# sf = shapefile.Reader(shapefile_path)
-# shapes = sf.shapes()
-
-# sf_connect = {}
-# sf_dict = {}
-# for num in range(57):
-#     points = shapes[num].points
-#     for point in points:
-#         if point in sf_dict:
-#             list_num = sf_dict[point]
-#             for past_num in list_num:
-#                 if not past_num in sf_dict and (num - past_num):
-#                     sf_connect[(num,past_num)] = ' '
-#                     sf_connect[(past_num,num)] = ' '
-#             sf_dict[point] += [num]
-#         elif point not in sf_dict:
-#             sf_dict[point] = [num]
-
-# count = 0
-# sf_children = {}
-# for sh_num1, sh_num2 in sf_connect:
-#     if (sh_num1 - sh_num2):
-#         if sh_num1 not in sf_children:
-#             sf_children[sh_num1] = [sh_num2]
-#             count += 1
-#         if sh_num1 in sf_children:
-#             if sh_num2 not in sf_children[sh_num1]:
-#                 sf_children[sh_num1].append(sh_num2)
-
-# print(sf_children)
-
-
-
-# import shapefile
-
-
-# shapefile_path = "test_files/s_11au16.dbf"
-# # shapefile_path = "test_files/s_11au16.shp"
-
-# sf = shapefile.Reader(shapefile_path)
-# shapeRecs = sf.shapeRecords()
-
-# sf_connect = {}
-# sf_dict = {}
-# for num in range(57):
-#     # rec = int(shapeRecs[num].record[2])
-#     # print(shapeRecs[num].record)
-#     rec = shapeRecs[num].record[0]
-#     points = shapeRecs[num].shape.points
-#     for point in points:
-#         if point in sf_dict:
-#             list_num = sf_dict[point]
-#             for past_num in list_num:
-#                 if not past_num in sf_dict and not (past_num == rec):
-#                 # if not past_num in sf_dict and (past_num-rec):
-#                     sf_connect[(rec,past_num)] = ' '
-#                     sf_connect[(past_num,rec)] = ' '
-#             sf_dict[point] += [rec]
-#         elif point not in sf_dict:
-#             sf_dict[point] = [rec]
-
-# count = 0
-# sf_children = {}
-# for sh_num1, sh_num2 in sf_connect:
-#     if sh_num1 not in sf_children:
-#         sf_children[sh_num1] = [sh_num2]
-#         count += 1
-#     if sh_num1 in sf_children:
-#         if sh_num2 not in sf_children[sh_num1]:
-#             sf_children[sh_num1].append(sh_num2)
-
-# print(sf_children)
-
-
-# Needs fixing:
-# Set instead of dictionary
-# Sizes
-# Putting items that do not connect 
\ No newline at end of file
--- a/reading_shapes/get_population.py
+++ b/reading_shapes/get_population.py
-import shapefile
-
-
-shapefile_path = "Census_tract/Tract_2010Census_DP1.shp"
-# shapefile_path = "test_files/s_11au16.shp"
-
-sf = shapefile.Reader(shapefile_path)
-
-
-def age_DR(record):
-    total_population = record[6]
-    weight_R = 0
-    weight_D= 0 
-    
-
-    pop_18_24 = record[10]*.75 + record[11]
-    weight_D += pop_18_24 * -0.25
-
-    pop_25_29 = record[12]
-    weight_D += pop_25_29 * -0.22
-
-    pop_30_39 = sum(record[13:15])
-    weight_D += pop_30_39 * -0.13
-
-    pop_40_49 = sum(record[15:17])
-    weight_R += pop_40_49 * 0.02
-
-    pop_50_64 = sum(record[17:20])
-    weight_R += pop_50_64 * 0.05
-
-    pop_over_65 = sum(record[20:25])
-    weight_R += pop_over_65 * 0.12
-
-    total_population_above_16 = pop_18_24 + pop_25_29 + pop_30_39 + pop_40_49 + pop_50_64 + pop_over_65
-    if not total_population_above_16:
-        return 0.0 
-    population_change = 1 - (total_population_above_16/total_population) + 1
-    weight_D = weight_D * population_change
-    weight_R = weight_R * population_change
-    add_weights = weight_R + weight_D
-    if not add_weights:
-        return 0.0
-    percent_RD =((add_weights)/(abs(weight_D)+weight_R))
-
-    return percent_RD
-
-
-def race_RD(record):
-    total_population = record[6]
-    weight_R = 0
-    weight_D= 0 
-
-    pop_white = record[83]
-    weight_R += pop_white * 0.20
-
-    pop_african_american = record[84]
-    weight_D += pop_african_american * -0.88
-
-    pop_latino = record[112]
-    weight_D += pop_latino * -0.45
-
-    pop_asian = record[86]
-    weight_D += pop_asian * -0.21
-
-
-
-    total_population_non_other = pop_white + pop_african_american + pop_latino + pop_asian
-    if not total_population_non_other:
-        return 0.0
-    population_change = 1 - (total_population_non_other/total_population) + 1
-    weight_D = weight_D * population_change
-    weight_R = weight_R * population_change
-    add_weights = weight_R + weight_D
-    if not add_weights:
-        return 0.0
-    percent_RD =((add_weights)/(abs(weight_D)+weight_R))
-
-    return percent_RD
-
-def gender_RD(record):
-    total_population = record[6]
-    weight_R = 0
-    weight_D = 0 
-    
-
-    pop_male = record[67]
-    weight_R += pop_male * 0.07
-
-    pop_female = record[68]
-    weight_D += pop_female * -0.12
-
-    total_population_above_16 = pop_female + pop_male
-    if not total_population_above_16:
-        return 0.0 
-    population_change = 1 - (total_population_above_16/total_population) + 1
-    weight_D = weight_D * population_change
-    weight_R = weight_R * population_change
-    add_weights = weight_R + weight_D
-    if not add_weights:
-        return 0.0
-    percent_RD =((add_weights)/(abs(weight_D)+weight_R))
-
-    return percent_RD
-
-
-
-def test_record(sf):
-    records = sf.records()
-    print(5)
-    tracts_dict = {}
-    for record in records:
-        record_dict = {}
-        total_population = record[6]
-        if total_population:
-            gender = gender_RD(record)
-
-            race = race_RD(record)
-
-            age = age_DR(record)
-
-            RD_final = ((gender+race+age)/3)/2
-            if RD_final > 0:
-                reps = (.5 + RD_final)* total_population
-                dems = (.5 - RD_final) * total_population
-            elif RD_final <= 0:
-                dems = (.5 + abs(RD_final))*total_population
-                reps = (.5 - abs(RD_final)) *total_population
-
-            record_dict["total_population"] = total_population
-            record_dict["D"] = dems
-            record_dict["R"] = reps
-        elif not total_population:
-            record_dict["total_population"] = 0
-            record_dict["D"] = 0
-            record_dict["R"] = 0
-        record_dict["lat"] = record[4]
-        record_dict["lon"] = record[5]
-
-        tracts_dict[record[0]] = record_dict
-    return tracts_dict
-
-
-
-
-
-
-# num_of_shape1 = 100000
-# def test_shape(sf)
-#     for shape_index_num in range(len):
-#     s = sf.shape(shape_index_num)
-
-
-#     count += 1
-#     # w = shapefile.Writer(s)
-#     # print(w)
-#     # w.field('FIRST_FLD','C','40')
-#     print(count)
-#     # w.record('First','Point')
--- a/reading_shapes/location_api.py
+++ b/reading_shapes/location_api.py
@@ -6,13 +6,14 @@ from util import create_json_file, recover_json
 API_KEY = "6865b163d2db4c501f40"

 def tracts_state(API_KEY):
+    '''
+    Use the GPS coordinates of tracts to determine which state they are in
+    To translate GPS coordinates into states, we used the LocationIQ API, found here: http://locationiq.org/
+    '''
    dict_tracts = recover_json("tracts1.json")
    dict_states = recover_json("statesrun_5.json")
-    print("statesrun_5.json")
    balance = check_balance(API_KEY)
-    print(balance)
    empty = []
-    # dict_states = {}
    count = 0
    for tract in dict_tracts:
        if tract not in dict_states:
@@ -23,7 +24,6 @@ def tracts_state(API_KEY):
            if _lat[0] == "+":
                _lat = _lat[1:]
            request_URL = "http://locationiq.org/v1/reverse.php?format=json&key=" + API_KEY + "&lat=" + _lat + "&lon=" + _lon + "&zoom=16"
-            print(request_URL, count)
            location_info = requests.get(request_URL)
            location_info_text = location_info.text
            location_info_dict = ast.literal_eval(location_info_text)
@@ -47,8 +47,10 @@ def tracts_state(API_KEY):


 def check_balance(API_KEY):
+    '''
+    Check how many more times we could use the LocationIQ API in a day
+    '''
    request_URL = "http://locationiq.org/v1/balance.php?key=" + API_KEY
-    print(request_URL)
    balance_request = requests.get(request_URL)
    balance_text = balance_request.text
    balance_dict = ast.literal_eval(balance_text)
@@ -57,6 +59,9 @@ def check_balance(API_KEY):
    return final_balance

 def hard_code(output=False):
+    '''
+    Hard code island tracts that were not properly attributed to their respective states
+    '''
    dict_states = recover_json("statesrun_6.json")
    hc_dict = {
    "06083001800": "California",
@@ -79,6 +84,11 @@ def hard_code(output=False):
    return dict_states

 def create_state_sets(output=False):
+    '''
+    Create a json file with dictionaries for each tract as a Master file
+    Create additional json files sorted by state
+    Store files in state_set_json_files
+    '''
    dict_states_w_tracts = {}
    dict_states = recover_json("states_hc.json")
    for tract, state in dict_states.items():