Census API and Data Collection

Code

# Import packages

import altair as alt
import geopandas as gpd
import pandas as pd
import numpy as np
import hvplot.pandas
import pandas as pd
#import seaborn as sns
from matplotlib import pyplot as plt
import holoviews as hv
from shapely.geometry import Polygon
from shapely.geometry import MultiPolygon
import requests
import geoviews as gv
import geoviews.tile_sources as gvts
import folium
from folium import plugins
from shapely.geometry import Point
import xyzservices
import osmnx as ox
import networkx as nx
import pygris
import cenpy



%matplotlib inline

# See lots of columns
pd.options.display.max_rows = 9999 
pd.options.display.max_colwidth = 200

# Hide warnings due to issue in shapely package 
# See: https://github.com/shapely/shapely/issues/1345
np.seterr(invalid="ignore");

Census API Data

In this section, an API query is generated to retrieve demographic data for San Francisco through the American Community Survey (ACS) 5-year survey for the year 2021. The variables selected for analysis include the white population, Hispanic or Latino population, median income, and the population that commutes by driving. These variables are deemed significant for understanding socioeconomic and commuting patterns after examining multiple variables. This information is brought in at the tract level to capture localized nuances. It is then joined to the working dataset to examine parking trends in the context of demographic associations at a granular level.

Code

#available = cenpy.explorer.available()
#available.head()

# Return a dataframe of all datasets that start with "ACS"
# Axis=0 means to filter the index labels!
#acs = available.filter(regex="^ACS", axis=0)

# Return a dataframe of all datasets that start with "ACSDT5Y"
#available.filter(regex="^ACSDT5Y", axis=0)
#acs = cenpy.remote.APIConnection("ACSDT5Y2019")
#acs.variables.head(n=100)


#looking for variables

#income_matches = acs.varslike(
#    pattern="MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS",
#    by="concept",  # searches along concept column
#).sort_index()

#race_matches

#race_matches = acs.varslike(
#        pattern="WHITE",
#    by="concept",  # searches along concept column
#).sort_index()

#race_matches

#drive choice

#drive_matches = acs.varslike(
#        pattern="transportation",
 #   by="concept",  # searches along concept column
#).sort_index()

#drive_matches

Code

#variables = [
#    "NAME",
 #   "B19013_001E", # med inc
 #   "B03002_001E", # Total
 #   "B03002_003E", # Not Hispanic, White
 #   "B03002_004E", # Not Hispanic, Black
 #   "B03002_005E", # Not Hispanic, American Indian
 #   "B03002_006E", # Not Hispanic, Asian
 #   "B03002_007E", # Not Hispanic, Native Hawaiian
 #   "B03002_008E", # Not Hispanic, Other
 #   "B03002_009E", #  Two or More Races
 #   "B03002_012E"]  # hisp

#Med_Age = B01002_001E,
#     White_Pop = B02001_002E,
#     Travel_Time = B08013_001E,
#     Num_Commuters = B08012_001E,
#     Means_of_Transport = B08301_001E,
#     Total_Public_Trans = B08301_010E,
#     workforce_16 = B08007_001E,
#     Num_Vehicles = B06012_002E,


#counties = cenpy.explorer.fips_table("COUNTY")
#counties.head()

# Search for rows where name contains "San Francisco"
#counties.loc[ counties[3].str.contains("San Francisco") ]

#sf_county_code = "075"
#ca_state_code = "06"

#sf_inc_data = acs.query(
#    cols=variables,
#    geo_unit="block group:*",
#    geo_filter={"state": ca_state_code, "county": sf_county_code, "tract": "*"},
#)


#sf_inc_data.head(700)

Note

At this point in our analysis, we were able to collect race and income variables from the census API, but were running into errors while trying to include other variables invloving drive time to work and preferred mode of transportation. To fix this error, we performed the census API call in R and joined that data to our existing dataset. The variables that we were unable to join are commented on the code chunk above. The R script used is available on the project repository.

Code

#convert to float 

#for variable in variables:
#    # Convert all variables EXCEPT for NAME
#    if variable != "NAME":
#        sf_inc_data[variable] = sf_inc_data[variable].astype(float)

Code

#merges
#sf_inc_data.rename(columns={'B19013_001E': 'Median Income',  "B03002_001E": "Total",  # Total
#        "B03002_003E": "White",  # Not Hispanic, White
#        "B03002_004E": "Black",  # Not Hispanic, Black
#        "B03002_005E": "AI/AN",  # Not Hispanic, American Indian
#        "B03002_006E": "Asian",  # Not Hispanic, Asian
#        "B03002_007E": "NH/PI",  # Not Hispanic, Native Hawaiian
#        "B03002_008E": "Other_",  # Not Hispanic, Other
#        "B03002_009E": "Two Plus",  # Not Hispanic, Two or More Races
#        "B03002_012E": "Hispanic"}, inplace=True)

#sf_inc_data = sf_inc_data.loc[sf_inc_data['Median Income'] > 0]

# sf_block_groups = pygris.block_groups(
#     state=ca_state_code, county=sf_county_code, year=2021
# )
# sf_final = sf_block_groups.merge(
#     sf_inc_data,
#     left_on=["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE"],
#     right_on=["state", "county", "tract", "block group"],
# )

# #writing the geojson to use in r

# #bringing back the complete dataset

# #sf_final.to_file(output_file, driver='GeoJSON')

sf_final = gpd.read_file("./data/census.geojson")

sf_final = gpd.sjoin(sf_final, sf_block_groups, how="inner", op="intersects")

columns_to_drop = ['STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'GEOID', 'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER', 'INTPTLAT', 'INTPTLON','index_right']

sf_final.drop(columns=columns_to_drop, inplace=True)

#sf_final.head()

#print(type(sf_final))

D:\Fall_2023\Python\Mambaforge\envs\musa-550-fall-2023\lib\site-packages\IPython\core\interactiveshell.py:3448: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):

Code

#columns_to_drop = ['index_right']

#sf_final.drop(columns=columns_to_drop, inplace=True)

#sf_final.head()

#column_names = sf_final.columns.tolist()
#print(column_names)

Exploratory analysis of census variables

First, the median income map is examined to discern patterns, if any, between neighborhood wealth and parking meter density. It is hard to draw any meaningful conclusions from this map alone, as we need to join the street and parking meter data to see where the overlaps occur.

Data by Census Tract

Code

# plot

#sf_final.explore(column="Med_Inc", tiles="cartodbdark_matter")

Make this Notebook Trusted to load map: File -> Trust Notebook