diff --git a/Project.toml b/Project.toml index d2d7212..419f4af 100644 --- a/Project.toml +++ b/Project.toml @@ -3,8 +3,16 @@ uuid = "9a9a8258-a423-4c9c-ac3d-7cc63de3c137" authors = ["Anshul Singhvi ", "Jacob Zelko ", "and contributors"] version = "0.1.0-DEV" +[deps] +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" +TidierVest = "969b988e-7aed-4820-b60d-bdec252047c4" + [compat] -julia = "1.6" +Aqua = "0.8" +Format = "1.3" +TidierVest = "0.4.3" +julia = "1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" diff --git a/src/TigerLine.jl b/src/TigerLine.jl index 5cfecd3..1a45fd4 100644 --- a/src/TigerLine.jl +++ b/src/TigerLine.jl @@ -1,5 +1,16 @@ module TigerLine -# Write your package code here. + using Downloads: + download + using Format: + FormatExpr, + printfmt + using TidierVest: + html_elements, + html_table, + read_html + + include("constants.jl") + include("downloads.jl") end diff --git a/src/constants.jl b/src/constants.jl new file mode 100644 index 0000000..f947bf0 --- /dev/null +++ b/src/constants.jl @@ -0,0 +1,125 @@ + +""" +Base URL for TIGER/Line data with two parameters for year and layer. +""" +BASE_TIGER_URL = FormatExpr("https://www2.census.gov/geo/tiger/TIGER{}/{}/") + +# "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_5m.zip" + +""" +A dictionary mapping human-readable keys to TIGER/Line dataset codes and their associated descriptions. + +> **Source:** `https://www2.census.gov/geo/tiger/TIGER2017/2017_TL_Shapefiles_File_Name_Definitions.pdf` + +## Keys + +- `"address_range_rel"` (**ADDR**) - Address Range Relationship File +- `"address_range_feat"` (**ADDRFEAT**) - Address Range Feature +- `"address_range_feat_name"` (**ADDRFN**) - Address Range-Feature Name Relationship +- `"native_areas"` (**AIANNH**) - American Indian / Alaska Native / Native Hawaiian Areas +- `"native_subdivision"` (**AITSN**) - American Indian Tribal Subdivision National +- `"alaska_native_corp"` (**ANRC**) - Alaska Native Regional Corporation +- `"area_landmark"` (**AREALM**) - Area Landmark +- `"area_water"` (**AREAWATER**) - Area Hydrography +- `"block_group"` (**BG**) - Block Group +- `"metro_micro_area"` (**CBSA**) - Metropolitan Statistical Area / Micropolitan Statistical Area +- `"congressional_district"` (**CD**) - Congressional District +- `"combined_new_england_city_town"` (**CNECTA**) - Combined New England City and Town Area +- `"coastline"` (**COASTLINE**) - Coastline +- `"consolidated_city"` (**CONCITY**) - Consolidated City +- `"county"` (**COUNTY**) - County +- `"county_subdivision"` (**COUSUB**) - County Subdivision +- `"combined_statistical_area"` (**CSA**) - Combined Statistical Area +- `"all_lines"` (**EDGES**) - All Lines +- `"elementary_school_district"` (**ELSD**) - Elementary School District +- `"estate"` (**ESTATE**) - Estate +- `"topo_faces"` (**FACES**) - Topological Faces (Polygons with All Geocodes) +- `"topo_faces_area_hydro"` (**FACESAH**) - Topological Faces-Area Hydrography Relationship File +- `"topo_faces_area_landmark"` (**FACESAL**) - Topological Faces-Area Landmark Relationship File +- `"topo_faces_military"` (**FACESMIL**) - Topological Faces-Military Installation Relationship File +- `"feature_names"` (**FEATNAMES**) - Feature Names Relationship File +- `"linear_hydro"` (**LINEARWATER**) - Linear Hydrography +- `"metro_division"` (**METDIV**) - Metropolitan Division +- `"military_installation"` (**MIL**) - Military Installation +- `"new_england_city_town"` (**NECTA**) - New England City and Town Area +- `"new_england_city_town_div"` (**NECTADIV**) - New England City and Town Area Division +- `"place"` (**PLACE**) - Place +- `"point_landmark"` (**POINTLM**) - Point Landmark +- `"primary_roads"` (**PRIMARYROADS**) - Primary Roads +- `"primary_secondary_roads"` (**PRISECROADS**) - Primary and Secondary Roads +- `"public_microdata_area"` (**PUMA**) - Public Use Microdata Area +- `"rails"` (**RAILS**) - Rails +- `"all_roads"` (**ROADS**) - All Roads +- `"secondary_school_district"` (**SCSD**) - Secondary School Districts +- `"state_legislative_lower"` (**SLDL**) - State Legislative District – Lower Chamber +- `"state_legislative_upper"` (**SLDU**) - State Legislative District – Upper Chamber +- `"state"` (**STATE**) - State and Equivalent +- `"subbarrio"` (**SUBBARRIO**) - SubMinor Civil Division (Subbarios in Puerto Rico) +- `"tabulation_block"` (**TABBLOCK**) - Tabulation (Census) Block +- `"tribal_block_group"` (**TBG**) - Tribal Block Group +- `"census_tract"` (**TRACT**) - Census Tract +- `"tribal_census_tract"` (**TTRACT**) - Tribal Census Tract +- `"urban_area_cluster"` (**UAC**) - Urban Area/Urban Cluster +- `"unified_school_district"` (**UNSD**) - Unified School District +- `"zip_code_area"` (**ZCTA5**) - 5-Digit ZIP Code Tabulation Area + +## Example + +```julia-repl +julia> TIGER_DICT["county"] +"COUNTY" +``` +""" +const TIGER_DICT = Dict( + "address_range_rel" => "ADDR", + "address_range_feat" => "ADDRFEAT", + "address_range_name_rel" => "ADDRFN", + "native_areas" => "AIANNH", + "tribal_subdivision_nat" => "AITSN", + "alaska_native_region" => "ANRC", + "area_landmark" => "AREALM", + "area_water" => "AREAWATER", + "block_group" => "BG", + "metro_micro_area" => "CBSA", + "congressional_district" => "CD", + "combined_necta" => "CNECTA", + "coastline" => "COASTLINE", + "consolidated_city" => "CONCITY", + "county" => "COUNTY", + "county_subdivision" => "COUSUB", + "combined_stat_area" => "CSA", + "all_lines" => "EDGES", + "elementary_school_district" => "ELSD", + "estate" => "ESTATE", + "topo_faces" => "FACES", + "faces_area_hydro" => "FACESAH", + "faces_area_landmark" => "FACESAL", + "faces_military" => "FACESMIL", + "feature_names_rel" => "FEATNAMES", + "linear_hydrography" => "LINEARWATER", + "metro_division" => "METDIV", + "military_installation" => "MIL", + "necta" => "NECTA", + "necta_division" => "NECTADIV", + "place" => "PLACE", + "point_landmark" => "POINTLM", + "primary_roads" => "PRIMARYROADS", + "primary_secondary_roads" => "PRISECROADS", + "puma" => "PUMA", + "rails" => "RAILS", + "all_roads" => "ROADS", + "secondary_school_district" => "SCSD", + "state_leg_district_lower" => "SLDL", + "state_leg_district_upper" => "SLDU", + "state" => "STATE", + "subbarrio" => "SUBBARRIO", + "tab_block" => "TABBLOCK", + "tribal_block_group" => "TBG", + "census_tract" => "TRACT", + "tribal_census_tract" => "TTRACT", + "urban_area_cluster" => "UAC", + "unified_school_district" => "UNSD", + "zip_code_area" => "ZCTA5" +) + +export TIGER_DICT diff --git a/src/downloads.jl b/src/downloads.jl new file mode 100644 index 0000000..9aa0d3b --- /dev/null +++ b/src/downloads.jl @@ -0,0 +1,56 @@ +""" +```julia +download_tiger(output_dir; + year = 2020, + layer = "state" +) +``` +Downloads TIGER/Line geographic data from the US Census Bureau for the specified year and geographic layer, +saving the data as shapefiles. + +## Arguments +- `output_dir::String`: The directory where downloaded files will be saved. + +## Keyword Arguments +- `year::Int=2020` (optional): The year of the TIGER/Line data to retrieve (e.g., 2020). +- `layer::String="state"` (optional): The geographic layer of the data; look at `TIGER_DICT` for more options. + +## Returns +# +- This function does not return anything. + +## Example +# +```julia-repl +julia> ?TIGER_DICT + + • "county" (COUNTY) - County + • "state" (STATE) - State and Equivalent + +julia> download_tiger("./data", year=2020, layer="county") +``` + +This will download county-level TIGER/Line data for 2020 and store the shapefiles in `./data`. +""" +function download_tiger(output_dir; year = 2020, layer = "state") + + url = sprint(printfmt, BASE_TIGER_URL, year, TIGER_DICT[layer]) + + html = read_html(url) + tables = html_elements(html, ["body", "table"]) + data = tables[1] |> html_table + files = data.Name[2:end] + + for f in files + @info "Downloading $f for layer, \"$(TIGER_DICT[layer])\", and year, $year." + download( + joinpath(url, f), + joinpath(output_dir, f) + ) + end + + @info "Requested \"$(TIGER_DICT[layer])\" data for $year has been downloaded! 🎉"; + +end + +export download_tiger