-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlibrary.R
103 lines (91 loc) · 3.37 KB
/
library.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
suppressMessages({
library(data.table)
library(dplyr)
library(pbapply)
library(XML)
})
get_tracks <- function(d) {
tracksfile <- file.path(d, "gpx", "full", "activities.gpx")
if (!file.exists(tracksfile)) {
zipfile <- file.path(d, "gpx.zip")
if (file.exists(zipfile))
unzip(zipfile, exdir=d)
rm(list="zipfile")
}
if (!file.exists(tracksfile))
stop("File not found: ", tracksfile)
activitylist <- xmlToList(xmlTreeParse(tracksfile))
# remove first and last element as they contain only metadata
activitylist[[1]] <- NULL
activitylist[[length(activitylist)]] <- NULL
# we now have a list of lists
# each element in activitylist is a list of segments for one day
# define function extractdays() to each day
extractdays <- function(daylist) {
# define function extractsegments() to process segments in each ady
extractsegments <- function(segmentlist) {
# segmentlist is a list with 3 elements for each segment
# 1: time stamp
# 2: activity type
# 3: coordinates
daytracks <- as.data.table(t(as.data.frame(
segmentlist[seq(from=3, to=length(segmentlist), by=3)])))
setnames(daytracks, c("longitude", "latitude"))
daytracks$longitude <- as.numeric(daytracks$longitude)
daytracks$latitude <- as.numeric(daytracks$latitude)
daytracks$time <-
unlist(segmentlist[seq(from=1, to=length(segmentlist), by=3)])
daytracks$activity <-
unlist(segmentlist[seq(from=2, to=length(segmentlist), by=3)])
daytracks
}
name <- as.Date(daylist[[1]], format="%m/%d/%y")
daylist[[1]] <- NULL
tracks <- rbindlist(lapply(daylist, extractsegments))
tracks$segment <- paste0(name, "-",
sprintf("%03i", rep(1:length(daylist), sapply(daylist, length)/3)))
setcolorder(tracks, c("segment", "time", "activity", "latitude",
"longitude"))
tracks
}
message("Processing tracks data...")
tracks <- tbl_dt(rbindlist(pblapply(activitylist, extractdays)))
tracks$date <- as.IDate(tracks$time, format="%Y-%m-%dT%H:%M:%S")
setkey(tracks, segment)
tracks
}
get_activities <- function(d) {
activitiesfile <- file.path(d, "csv", "full", "activities.csv")
if (!file.exists(activitiesfile)) {
zipfile <- file.path(d, "csv.zip")
if (file.exists(zipfile))
unzip(zipfile, exdir=d)
rm(list="zipfile")
}
if (!file.exists(activitiesfile))
stop("File not found: ", activitiesfile)
activities <- tbl_df(fread(activitiesfile))
setnames(activities, tolower(colnames(activities)))
activities$startdate <- as.IDate(activities$start, format="%Y-%m-%dT%H:%M:%S")
activities$enddate <- as.IDate(activities$end, format="%Y-%m-%dT%H:%M:%S")
activities
}
get_places <- function(d) {
placesfile <- file.path(d, "csv", "full", "places.csv")
if (!file.exists(placesfile)) {
zipfile <- file.path(d, "csv.zip")
if (file.exists(zipfile))
unzip(zipfile, exdir=d)
rm(list="zipfile")
}
if (!file.exists(placesfile))
stop("File not found: ", placesfile)
places <- tbl_dt(fread(placesfile))
setnames(places, tolower(colnames(places)))
places$startdate <- as.IDate(places$start, format="%Y-%m-%dT%H:%M:%S")
places$starttime <- as.ITime(places$start, format="%Y-%m-%dT%H:%M:%S")
places$enddate <- as.IDate(places$end, format="%Y-%m-%dT%H:%M:%S")
places$endtime <- as.ITime(places$end, format="%Y-%m-%dT%H:%M:%S")
places
}
# EOF