Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
name = "TigerLine"
uuid = "9a9a8258-a423-4c9c-ac3d-7cc63de3c137"
authors = ["Anshul Singhvi <[email protected]>", "Jacob Zelko <[email protected]>", "and contributors"]
version = "0.1.0-DEV"
version = "0.1.0"

[deps]
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4"
TidierVest = "969b988e-7aed-4820-b60d-bdec252047c4"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[compat]
julia = "1.6"
Aqua = "0.8"
Downloads = "1"
Scratch = "1"
Shapefile = "0.13"
Test = "1"
TidierVest = "0.4"
ZipFile = "0.9, 0.10"
julia = "1.10"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,18 @@
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaGeo.github.io/TigerLine.jl/dev/)
[![Build Status](https://github.com/JuliaGeo/TigerLine.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaGeo/TigerLine.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![Aqua](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)

## Quick start

```julia-repl
julia> using TigerLine

julia> TigerLine.get(2020, "county")
Shapefile.Table{Union{Missing, Shapefile.Polygon}} with 3234 rows and the following 18 columns:

geometry, STATEFP, COUNTYFP, COUNTYNS, GEOID, NAME, NAMELSAD, LSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, ALAND, AWATER, INTPTLAT, INTPTLON
```

This is a [Shapefile.jl](https://github.com/Shapefile.jl) `Table`, which is a [GeoInterface.jl](https://github.com/GeoInterface.jl)-compatible feature collection, and also a [Tables.jl](https://github.com/JuliaData/Tables.jl) table.

You can convert the result to a [DataFrame](https://github.com/JuliaData/DataFrames.jl) by `TigerLine.get(2020, country) |> DataFrame`, for example.
24 changes: 23 additions & 1 deletion src/TigerLine.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
module TigerLine

# Write your package code here.
using Downloads:
download
using Scratch: @get_scratch!

import TidierVest # to enable parsing TIGER html tables

import Shapefile, ZipFile # to enable reading

download_cache::String = ""

function __init__()
global download_cache = @get_scratch!("tigerline_cache")
end

include("constants.jl")
include("downloads.jl")
include("get.jl")

export download_tiger, list_tiger_files, base_tiger_url
# Note that `get` is not exported here. The main way to use it is `TigerLine.get(year, layer)`.
@static if VERSION >= v"1.11"
include("public.jl")
end

end
138 changes: 138 additions & 0 deletions src/constants.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@

const CENSUS_URL = Ref{String}("https://www2.census.gov/")

"""
base_tiger_url(year, layer)

Returns the base URL for TIGER/Line data with two parameters for year and layer.

`layer` must be a **value** of [`TIGER_DICT`](@ref).

The URL returned is guaranteed to end in `/`.

```jldoctest
julia> base_tiger_url(1234, "somelayer")
"https://www2.census.gov/geo/tiger/TIGER1234/somelayer/"
```
"""
base_tiger_url(year, layer) = CENSUS_URL[] * "geo/tiger/TIGER$(year)/$(layer)/"

# "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_5m.zip"

"""
A dictionary mapping human-readable keys to TIGER/Line dataset codes and their associated descriptions.

> **Source:** `https://www2.census.gov/geo/tiger/TIGER2017/2017_TL_Shapefiles_File_Name_Definitions.pdf`

## Keys

- `"address_range_rel"` (**ADDR**) - Address Range Relationship File
- `"address_range_feat"` (**ADDRFEAT**) - Address Range Feature
- `"address_range_feat_name"` (**ADDRFN**) - Address Range-Feature Name Relationship
- `"native_areas"` (**AIANNH**) - American Indian / Alaska Native / Native Hawaiian Areas
- `"native_subdivision"` (**AITSN**) - American Indian Tribal Subdivision National
- `"alaska_native_corp"` (**ANRC**) - Alaska Native Regional Corporation
- `"area_landmark"` (**AREALM**) - Area Landmark
- `"area_water"` (**AREAWATER**) - Area Hydrography
- `"block_group"` (**BG**) - Block Group
- `"metro_micro_area"` (**CBSA**) - Metropolitan Statistical Area / Micropolitan Statistical Area
- `"congressional_district"` (**CD**) - Congressional District
- `"combined_new_england_city_town"` (**CNECTA**) - Combined New England City and Town Area
- `"coastline"` (**COASTLINE**) - Coastline
- `"consolidated_city"` (**CONCITY**) - Consolidated City
- `"county"` (**COUNTY**) - County
- `"county_subdivision"` (**COUSUB**) - County Subdivision
- `"combined_statistical_area"` (**CSA**) - Combined Statistical Area
- `"all_lines"` (**EDGES**) - All Lines
- `"elementary_school_district"` (**ELSD**) - Elementary School District
- `"estate"` (**ESTATE**) - Estate
- `"topo_faces"` (**FACES**) - Topological Faces (Polygons with All Geocodes)
- `"topo_faces_area_hydro"` (**FACESAH**) - Topological Faces-Area Hydrography Relationship File
- `"topo_faces_area_landmark"` (**FACESAL**) - Topological Faces-Area Landmark Relationship File
- `"topo_faces_military"` (**FACESMIL**) - Topological Faces-Military Installation Relationship File
- `"feature_names"` (**FEATNAMES**) - Feature Names Relationship File
- `"linear_hydro"` (**LINEARWATER**) - Linear Hydrography
- `"metro_division"` (**METDIV**) - Metropolitan Division
- `"military_installation"` (**MIL**) - Military Installation
- `"new_england_city_town"` (**NECTA**) - New England City and Town Area
- `"new_england_city_town_div"` (**NECTADIV**) - New England City and Town Area Division
- `"place"` (**PLACE**) - Place
- `"point_landmark"` (**POINTLM**) - Point Landmark
- `"primary_roads"` (**PRIMARYROADS**) - Primary Roads
- `"primary_secondary_roads"` (**PRISECROADS**) - Primary and Secondary Roads
- `"public_microdata_area"` (**PUMA**) - Public Use Microdata Area
- `"rails"` (**RAILS**) - Rails
- `"all_roads"` (**ROADS**) - All Roads
- `"secondary_school_district"` (**SCSD**) - Secondary School Districts
- `"state_legislative_lower"` (**SLDL**) - State Legislative District – Lower Chamber
- `"state_legislative_upper"` (**SLDU**) - State Legislative District – Upper Chamber
- `"state"` (**STATE**) - State and Equivalent
- `"subbarrio"` (**SUBBARRIO**) - SubMinor Civil Division (Subbarios in Puerto Rico)
- `"tabulation_block"` (**TABBLOCK**) - Tabulation (Census) Block
- `"tribal_block_group"` (**TBG**) - Tribal Block Group
- `"census_tract"` (**TRACT**) - Census Tract
- `"tribal_census_tract"` (**TTRACT**) - Tribal Census Tract
- `"urban_area_cluster"` (**UAC**) - Urban Area/Urban Cluster
- `"unified_school_district"` (**UNSD**) - Unified School District
- `"zip_code_area"` (**ZCTA5**) - 5-Digit ZIP Code Tabulation Area

## Example

```julia-repl
julia> TIGER_DICT["county"]
"COUNTY"
```
"""
const TIGER_DICT = Dict(
"address_range_rel" => "ADDR",
"address_range_feat" => "ADDRFEAT",
"address_range_name_rel" => "ADDRFN",
"native_areas" => "AIANNH",
"tribal_subdivision_nat" => "AITSN",
"alaska_native_region" => "ANRC",
"area_landmark" => "AREALM",
"area_water" => "AREAWATER",
"block_group" => "BG",
"metro_micro_area" => "CBSA",
"congressional_district" => "CD",
"combined_necta" => "CNECTA",
"coastline" => "COASTLINE",
"consolidated_city" => "CONCITY",
"county" => "COUNTY",
"county_subdivision" => "COUSUB",
"combined_stat_area" => "CSA",
"all_lines" => "EDGES",
"elementary_school_district" => "ELSD",
"estate" => "ESTATE",
"topo_faces" => "FACES",
"faces_area_hydro" => "FACESAH",
"faces_area_landmark" => "FACESAL",
"faces_military" => "FACESMIL",
"feature_names_rel" => "FEATNAMES",
"linear_hydrography" => "LINEARWATER",
"metro_division" => "METDIV",
"military_installation" => "MIL",
"necta" => "NECTA",
"necta_division" => "NECTADIV",
"place" => "PLACE",
"point_landmark" => "POINTLM",
"primary_roads" => "PRIMARYROADS",
"primary_secondary_roads" => "PRISECROADS",
"puma" => "PUMA",
"rails" => "RAILS",
"all_roads" => "ROADS",
"secondary_school_district" => "SCSD",
"state_leg_district_lower" => "SLDL",
"state_leg_district_upper" => "SLDU",
"state" => "STATE",
"subbarrio" => "SUBBARRIO",
"tab_block" => "TABBLOCK",
"tribal_block_group" => "TBG",
"census_tract" => "TRACT",
"tribal_census_tract" => "TTRACT",
"urban_area_cluster" => "UAC",
"unified_school_district" => "UNSD",
"zip_code_area" => "ZCTA5"
)

export TIGER_DICT
94 changes: 94 additions & 0 deletions src/downloads.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
download_tiger(output_dir; year, layer)

Downloads TIGER/Line geographic data from the US Census Bureau for the specified year and geographic layer,
saving the data as shapefiles.

Valid arguments for `layer` are the keys of [`TIGER_DICT`](@ref). Valid arguments for `year` are those years
for which TIGER/Line data is available.

## Arguments
- `output_dir::String`: The directory where downloaded files will be saved.

## Keyword Arguments
- `year::Int=2020` (optional): The year of the TIGER/Line data to retrieve (e.g., 2020).
- `layer::String="state"` (optional): The geographic layer of the data; must be a **key** of [`TIGER_DICT`](@ref).

## Returns
Returns a vector of (absolute) file paths to the downloaded files.

## Example
```jldoctest
julia> ?TIGER_DICT

• "county" (COUNTY) - County
• "state" (STATE) - State and Equivalent

julia> download_tiger("./data", year=2020, layer="county")
1-element Vector{String}:
"./data/tl_2020_us_county.zip"
```

This will download county-level TIGER/Line data for 2020 and store the shapefiles in `./data`.
"""
function download_tiger(output_dir; year = 2020, layer = "state")

url = base_tiger_url(year, TIGER_DICT[layer])

files = list_tiger_files(year, layer)

for f in files
@debug "TigerLine.jl: Downloading $f" _layer=TIGER_DICT[layer] year
download(
url * f, # URL from `base_tiger_url` is guaranteed to end in `/`
joinpath(output_dir, f) # joinpath is correct here since this is a local file path
)
end

@debug "TigerLine.jl: Requested \"$(TIGER_DICT[layer])\" data for $year has been downloaded! 🎉";

return joinpath.((output_dir,), files) # return a vector of file paths that have been downloaded to

end

"""
list_tiger_files(year, layer)

Returns a vector of file names for the specified year and layer.

This will return only _file names_, not the full paths to the layer.
You can get the full path by using [`base_tiger_url`](@ref) and joining the result with the file name,
like so:
```julia
files = list_tiger_files(2020, "county")
full_paths = TigerLine.base_tiger_url(2020, "county") .* files
```

## Arguments
- `year::Int`: The year of the TIGER/Line data to retrieve (e.g., 2020).
- `layer::String`: The geographic layer of the data; must be a **key** of [`TIGER_DICT`](@ref).

## Example

```jldoctest
julia> list_tiger_files(2020, "county")
1-element Vector{String}:
"tl_2020_us_county.zip"
```

This will return a vector of file names for the specified year and layer.
"""
function list_tiger_files(year, layer)

url = base_tiger_url(year, TIGER_DICT[layer])

html = TidierVest.read_html(url)
tables = TidierVest.html_elements(html, ["body", "table"])

data = tables[1] |> TidierVest.html_table # TODO: throw an informative error if this fails

files = data.Name[2:end] # TODO: throw an informative error if this fails

return files

end
59 changes: 59 additions & 0 deletions src/get.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
TigerLine.get(year, layer)

Downloads the TIGER/Line file for the specified year and layer, and returns a Shapefile.Table.

## Arguments
- `year::Int`: The year of the TIGER/Line data to retrieve (e.g., 2020).
- `layer::String`: The geographic layer of the data; look at `TIGER_DICT` for more options.

## Returns
Returns a `Shapefile.Table`, which is a [Tables.jl](https://github.com/JuliaData/Tables.jl) table
and a [GeoInterface.jl](https://github.com/JuliaGeo/GeoInterface.jl) feature collection.

All geometries are in `table.geometry`, and all other attributes are the other table columns.
The table can be accessed by `getproperty` syntax, e.g. `table.NAME`, which yields that column as a
Julia vector.

## Example
```julia-repl
julia> using TigerLine

julia> TigerLine.get(2020, "county")
Shapefile.Table{Union{Missing, Shapefile.Polygon}} with 3234 rows and the following 18 columns:

geometry, STATEFP, COUNTYFP, COUNTYNS, GEOID, NAME, NAMELSAD, LSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, ALAND, AWATER, INTPTLAT, INTPTLON
```
"""
function get(year, layer)

global download_cache # bring in the global download cache that is set in Scratch.jl

# Get the URL and files for the requested layer and year
url = base_tiger_url(year, TIGER_DICT[layer])
files = list_tiger_files(year, layer)

# Check that there is only one file for the requested layer and year, if not then error
if length(files) == 0
throw(ArgumentError("No files found for layer, \"$(TIGER_DICT[layer])\", and year, $year."))
elseif length(files) > 1
throw(ArgumentError("Multiple files found for layer, \"$(TIGER_DICT[layer])\", and year, $year. Please use `download_tiger` to download these files explicitly."))
end # if length(files) == 1, then continue

# Get the file path to the file
file_path = joinpath(download_cache, only(files))

# Download the file if it doesn't exist / has not been cached
if !isfile(file_path)
@debug "TigerLine.jl: Did not find $file_path, downloading from $url"
download(url * only(files), file_path)
end

# Read the file via Shapefile.jl and return the table so read.
# TODO: should we make this a DataFrame instead for maximum manipulability?
# We can add geo metadata to the dataframe easily following the GeoDataFrames approach.
return Shapefile.Table(file_path)
end

# add a keyword argument version of the function
get(; year = 2020, layer = "county") = get(year, layer)
1 change: 1 addition & 0 deletions src/public.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
public get