Delete pa1.py

205b0505 · Dave Foote · bce7d253 · bce7d253
Commit 205b0505 authored Apr 16, 2019 by Dave Foote
Showing with 0 additions and 88 deletions
pa1.py
--- a/pa1.py
+++ b/pa1.py
-'''
-Programming Assignment #1: Diagnostic
-
-Dave Foote
-'''
-import pandas as pd
-import matplotlib as plt
-from sodapy import Socrata
-from shapely.geometry import Point, Polygon, MultiPolygon
-
-#data download helpers
-def get_full_dataset(data_set_id):
-    #goes to Chicago Open Data Portal and downloads full crime dataset for a
-    #given year
-    
-    domain = 'data.cityofchicago.org'
-    client = Socrata(domain, None)
-    offset = 0
-    limit = 50000
-    new_downloads = make_new_downloads(data_set_id, client, limit, offset)
-    rv = []
-    
-    while len(new_downloads) > 0:
-        offset += limit
-        rv.extend(new_downloads)
-        new_downloads = make_new_downloads(data_set_id, client, limit, offset)
-    
-    return rv
-        
-def make_new_downloads(data_set_id, client, limit, offset):
-    #downloads the next 50,000 rows
-    return client.get(data_set_id, limit=limit, offset=offset)
-
-def create_df(list_of_dicts):
-    #takes the full list of crime records and makes a df out of them
-    return pd.DataFrame(list_of_dicts)
-
-def get_coordinates(df):
-    '''
-    inputs: df with string lat/long columns
-    creates an associated GeoPandas series of points
-    '''
-    longs = [float(x) for x in df.longitude]
-    lats = [float(x) for x in df.latitude]
-
-    return geopandas.GeoSeries([Point(x) for x in list(zip(longs, lats))])
-
-def get_tracts(df_info, df_tract):
-    '''
-    inputs: a dataframe with records you'd like to assign tracts to and a dataframe
-    with boundaries of census tracts (df_tract)
-    output: 
-    '''
-#get the data
-id_17 = 'd62x-nvdr'
-id_18 = '3i3m-jwuy'
-
-df_17 = create_df(get_full_dataset(id_17))
-df_18 = create_df(get_full_dataset(id_18))
-
-#summary statistics:
-print("Part 1: Statistical Summary")
-print('10 Most Common Chicago Crimes in 2017:\n',
-      df_17.primary_type.value_counts().head(10))
-print('10 Most Common Chicago Crimes in 2018:\n',
-      df_18.primary_type.value_counts().head(10))
-print('10 Neighborhoods with Highest Volume of Crime in 2017:\n',
-      df_17.community_area.value_counts().head(10))
-print('10 Neighborhoods with Highest Volume of Crime in 2018:\n',
-      df_18.community_area.value_counts().head(10))
-
-arrest_rates_17 = df_17.arrest.value_counts()
-arrest_rates_18 = df_18.arrest.value_counts()
-aps_17 = arrest_rates_17[1] / arrest_rates_17[0]
-aps_18 = arrest_rates_18[1] / arrest_rates_18[0]
-
-print('Arrests Per Stop in 2017:\n', aps_17)
-print('Arrests Per Stop in 2018:\n', aps_18)
-print('Change in Arrest Per Stop Rate from 17-18:\n', (aps_18 - aps_17))
-
-#pull in census tract data from chicago data portal:
-id_tract = '74p9-q2aq'
-df_tract_info = create_df(get_full_datset(id_tract))
-'''
-
-
-
-