-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNYCTaxiTripsTableDef-TM.sql
30 lines (30 loc) · 1004 Bytes
/
NYCTaxiTripsTableDef-TM.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- Creating an external table named 'nyctaxitrips' to store NYC Taxi trip data
CREATE EXTERNAL TABLE `nyctaxitrips`
(
`id` string,
`vendorId` int,
`pickupDate` string,
`dropoffDate` string,
`passengerCount` int,
`pickupLongitude` double,
`pickupLatitude` double,
`dropoffLongitude` double,
`dropoffLatitude` double,
`storeAndFwdFlag` string,
`gcDistance` double,
`tripDuration` int,
`googleDistance` int,
`googleDuration` int,
`source` string
)
PARTITIONED BY (
`year` string,
`month` string,
`day` string,
`hour` string,
`vendorId` int -- Adding vendorId as a partition key for more granularity
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://<BUCKET-NAME>/nyctaxitrips/';