forked from toddwschneider/nyc-taxi-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_nyc_taxi_schema.sql
151 lines (139 loc) · 3.44 KB
/
create_nyc_taxi_schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
CREATE EXTENSION postgis;
CREATE TABLE green_tripdata_staging (
id serial primary key,
vendor_id text,
lpep_pickup_datetime text,
lpep_dropoff_datetime text,
store_and_fwd_flag text,
rate_code_id text,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count text,
trip_distance text,
fare_amount text,
extra text,
mta_tax text,
tip_amount text,
tolls_amount text,
ehail_fee text,
improvement_surcharge text,
total_amount text,
payment_type text,
trip_type text,
pickup_location_id text,
dropoff_location_id text,
junk1 text,
junk2 text
);
/*
N.B. junk columns are there because some tripdata file headers are
inconsistent with the actual data, e.g. header says 20 or 21 columns per row,
but data actually has 22 or 23 columns per row, which COPY doesn't like.
junk1 and junk2 should always be null
*/
CREATE TABLE yellow_tripdata_staging (
id serial primary key,
vendor_id text,
tpep_pickup_datetime text,
tpep_dropoff_datetime text,
passenger_count text,
trip_distance text,
pickup_longitude numeric,
pickup_latitude numeric,
rate_code_id text,
store_and_fwd_flag text,
dropoff_longitude numeric,
dropoff_latitude numeric,
payment_type text,
fare_amount text,
extra text,
mta_tax text,
tip_amount text,
tolls_amount text,
improvement_surcharge text,
total_amount text,
pickup_location_id text,
dropoff_location_id text,
junk1 text,
junk2 text
);
CREATE TABLE uber_trips_2014 (
id serial primary key,
pickup_datetime timestamp without time zone,
pickup_latitude numeric,
pickup_longitude numeric,
base_code text
);
CREATE TABLE fhv_trips_staging (
dispatching_base_num text,
pickup_datetime text,
dropoff_datetime text,
pickup_location_id text,
dropoff_location_id text,
shared_ride text
);
CREATE TABLE fhv_trips (
id serial primary key,
dispatching_base_num text,
pickup_datetime timestamp without time zone,
dropoff_datetime timestamp without time zone,
pickup_location_id integer,
dropoff_location_id integer,
shared_ride integer
);
CREATE TABLE fhv_bases (
base_number text primary key,
base_name text,
dba text,
dba_category text
);
CREATE INDEX ON fhv_bases (dba_category);
CREATE TABLE cab_types (
id serial primary key,
type text
);
INSERT INTO cab_types (type) SELECT 'yellow';
INSERT INTO cab_types (type) SELECT 'green';
CREATE TABLE trips (
id serial primary key,
cab_type_id integer,
vendor_id text,
pickup_datetime timestamp without time zone,
dropoff_datetime timestamp without time zone,
store_and_fwd_flag text,
rate_code_id integer,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count integer,
trip_distance numeric,
fare_amount numeric,
extra numeric,
mta_tax numeric,
tip_amount numeric,
tolls_amount numeric,
ehail_fee numeric,
improvement_surcharge numeric,
total_amount numeric,
payment_type text,
trip_type integer,
pickup_nyct2010_gid integer,
dropoff_nyct2010_gid integer,
pickup_location_id integer,
dropoff_location_id integer
);
CREATE TABLE central_park_weather_observations (
station_id text,
station_name text,
date date,
precipitation numeric,
snow_depth numeric,
snowfall numeric,
max_temperature numeric,
min_temperature numeric,
average_wind_speed numeric
);
CREATE UNIQUE INDEX ON central_park_weather_observations (date);