# Import packagesimport altair as altimport geopandas as gpdimport pandas as pdimport numpy as npimport hvplot.pandasimport pandas as pd#import seaborn as snsfrom matplotlib import pyplot as pltimport holoviews as hvfrom shapely.geometry import Polygonfrom shapely.geometry import MultiPolygonimport requestsimport geoviews as gvimport geoviews.tile_sources as gvtsimport foliumfrom folium import pluginsfrom shapely.geometry import Pointimport xyzservicesimport osmnx as oximport networkx as nximport pygrisimport cenpy%matplotlib inline# See lots of columnspd.options.display.max_rows =9999pd.options.display.max_colwidth =200# Hide warnings due to issue in shapely package # See: https://github.com/shapely/shapely/issues/1345np.seterr(invalid="ignore");
Census API Data
In this section, an API query is generated to retrieve demographic data for San Francisco through the American Community Survey (ACS) 5-year survey for the year 2021. The variables selected for analysis include the white population, Hispanic or Latino population, median income, and the population that commutes by driving. These variables are deemed significant for understanding socioeconomic and commuting patterns after examining multiple variables. This information is brought in at the tract level to capture localized nuances. It is then joined to the working dataset to examine parking trends in the context of demographic associations at a granular level.
Code
#available = cenpy.explorer.available()#available.head()# Return a dataframe of all datasets that start with "ACS"# Axis=0 means to filter the index labels!#acs = available.filter(regex="^ACS", axis=0)# Return a dataframe of all datasets that start with "ACSDT5Y"#available.filter(regex="^ACSDT5Y", axis=0)#acs = cenpy.remote.APIConnection("ACSDT5Y2019")#acs.variables.head(n=100)#looking for variables#income_matches = acs.varslike(# pattern="MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS",# by="concept", # searches along concept column#).sort_index()#race_matches#race_matches = acs.varslike(# pattern="WHITE",# by="concept", # searches along concept column#).sort_index()#race_matches#drive choice#drive_matches = acs.varslike(# pattern="transportation",# by="concept", # searches along concept column#).sort_index()#drive_matches
Code
#variables = [# "NAME",# "B19013_001E", # med inc# "B03002_001E", # Total# "B03002_003E", # Not Hispanic, White# "B03002_004E", # Not Hispanic, Black# "B03002_005E", # Not Hispanic, American Indian# "B03002_006E", # Not Hispanic, Asian# "B03002_007E", # Not Hispanic, Native Hawaiian# "B03002_008E", # Not Hispanic, Other# "B03002_009E", # Two or More Races# "B03002_012E"] # hisp#Med_Age = B01002_001E,# White_Pop = B02001_002E,# Travel_Time = B08013_001E,# Num_Commuters = B08012_001E,# Means_of_Transport = B08301_001E,# Total_Public_Trans = B08301_010E,# workforce_16 = B08007_001E,# Num_Vehicles = B06012_002E,#counties = cenpy.explorer.fips_table("COUNTY")#counties.head()# Search for rows where name contains "San Francisco"#counties.loc[ counties[3].str.contains("San Francisco") ]#sf_county_code = "075"#ca_state_code = "06"#sf_inc_data = acs.query(# cols=variables,# geo_unit="block group:*",# geo_filter={"state": ca_state_code, "county": sf_county_code, "tract": "*"},#)#sf_inc_data.head(700)
Note
At this point in our analysis, we were able to collect race and income variables from the census API, but were running into errors while trying to include other variables invloving drive time to work and preferred mode of transportation. To fix this error, we performed the census API call in R and joined that data to our existing dataset. The variables that we were unable to join are commented on the code chunk above. The R script used is available on the project repository.
Code
#convert to float #for variable in variables:# # Convert all variables EXCEPT for NAME# if variable != "NAME":# sf_inc_data[variable] = sf_inc_data[variable].astype(float)
Code
#merges#sf_inc_data.rename(columns={'B19013_001E': 'Median Income', "B03002_001E": "Total", # Total# "B03002_003E": "White", # Not Hispanic, White# "B03002_004E": "Black", # Not Hispanic, Black# "B03002_005E": "AI/AN", # Not Hispanic, American Indian# "B03002_006E": "Asian", # Not Hispanic, Asian# "B03002_007E": "NH/PI", # Not Hispanic, Native Hawaiian# "B03002_008E": "Other_", # Not Hispanic, Other# "B03002_009E": "Two Plus", # Not Hispanic, Two or More Races# "B03002_012E": "Hispanic"}, inplace=True)#sf_inc_data = sf_inc_data.loc[sf_inc_data['Median Income'] > 0]# sf_block_groups = pygris.block_groups(# state=ca_state_code, county=sf_county_code, year=2021# )# sf_final = sf_block_groups.merge(# sf_inc_data,# left_on=["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"],# right_on=["state", "county", "tract", "block group"],# )# #writing the geojson to use in r# #bringing back the complete dataset# #sf_final.to_file(output_file, driver='GeoJSON')sf_final = gpd.read_file("./data/census.geojson")sf_final = gpd.sjoin(sf_final, sf_block_groups, how="inner", op="intersects")columns_to_drop = ['STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'GEOID', 'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT', 'INTPTLON','index_right']sf_final.drop(columns=columns_to_drop, inplace=True)#sf_final.head()#print(type(sf_final))
D:\Fall_2023\Python\Mambaforge\envs\musa-550-fall-2023\lib\site-packages\IPython\core\interactiveshell.py:3448: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
if await self.run_code(code, result, async_=asy):
First, the median income map is examined to discern patterns, if any, between neighborhood wealth and parking meter density. It is hard to draw any meaningful conclusions from this map alone, as we need to join the street and parking meter data to see where the overlaps occur.