Commit 205b0505 by Dave Foote

Delete pa1.py

parent bce7d253
Showing with 0 additions and 88 deletions
'''
Programming Assignment #1: Diagnostic
Dave Foote
'''
import pandas as pd
import matplotlib as plt
from sodapy import Socrata
from shapely.geometry import Point, Polygon, MultiPolygon
#data download helpers
def get_full_dataset(data_set_id):
#goes to Chicago Open Data Portal and downloads full crime dataset for a
#given year
domain = 'data.cityofchicago.org'
client = Socrata(domain, None)
offset = 0
limit = 50000
new_downloads = make_new_downloads(data_set_id, client, limit, offset)
rv = []
while len(new_downloads) > 0:
offset += limit
rv.extend(new_downloads)
new_downloads = make_new_downloads(data_set_id, client, limit, offset)
return rv
def make_new_downloads(data_set_id, client, limit, offset):
#downloads the next 50,000 rows
return client.get(data_set_id, limit=limit, offset=offset)
def create_df(list_of_dicts):
#takes the full list of crime records and makes a df out of them
return pd.DataFrame(list_of_dicts)
def get_coordinates(df):
'''
inputs: df with string lat/long columns
creates an associated GeoPandas series of points
'''
longs = [float(x) for x in df.longitude]
lats = [float(x) for x in df.latitude]
return geopandas.GeoSeries([Point(x) for x in list(zip(longs, lats))])
def get_tracts(df_info, df_tract):
'''
inputs: a dataframe with records you'd like to assign tracts to and a dataframe
with boundaries of census tracts (df_tract)
output:
'''
#get the data
id_17 = 'd62x-nvdr'
id_18 = '3i3m-jwuy'
df_17 = create_df(get_full_dataset(id_17))
df_18 = create_df(get_full_dataset(id_18))
#summary statistics:
print("Part 1: Statistical Summary")
print('10 Most Common Chicago Crimes in 2017:\n',
df_17.primary_type.value_counts().head(10))
print('10 Most Common Chicago Crimes in 2018:\n',
df_18.primary_type.value_counts().head(10))
print('10 Neighborhoods with Highest Volume of Crime in 2017:\n',
df_17.community_area.value_counts().head(10))
print('10 Neighborhoods with Highest Volume of Crime in 2018:\n',
df_18.community_area.value_counts().head(10))
arrest_rates_17 = df_17.arrest.value_counts()
arrest_rates_18 = df_18.arrest.value_counts()
aps_17 = arrest_rates_17[1] / arrest_rates_17[0]
aps_18 = arrest_rates_18[1] / arrest_rates_18[0]
print('Arrests Per Stop in 2017:\n', aps_17)
print('Arrests Per Stop in 2018:\n', aps_18)
print('Change in Arrest Per Stop Rate from 17-18:\n', (aps_18 - aps_17))
#pull in census tract data from chicago data portal:
id_tract = '74p9-q2aq'
df_tract_info = create_df(get_full_datset(id_tract))
'''
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment