2017 Yearly Analysis
The data for the year 2015 gives us a benchmark to base our analysis on as 2015 was the first year in business for the given company.
Preparing Data for Analysis:
library("readxl")
library("ggplot2")
library("dygraphs")
setwd("D:/Case Study")
<- vector()
paths2017 <- c(paths2017, paste(getwd(),"/Yearly_Data/2017/Quarterly/Divvy_Trips_2017_Q1.xlsx", sep = ""))
paths2017 <- c(paths2017, paste(getwd(),"/Yearly_Data/2017/Quarterly/Divvy_Trips_2017_Q2.xlsx", sep = ""))
paths2017 <- c(paths2017, paste(getwd(),"/Yearly_Data/2017/Quarterly/Divvy_Trips_2017_Q3.xlsx", sep = ""))
paths2017 <- c(paths2017, paste(getwd(),"/Yearly_Data/2017/Quarterly/Divvy_Trips_2017_Q4.xlsx", sep = ""))
paths2017 <- c(paths2017)
AllPaths <- list()
Trips for(i in AllPaths)
{for(Mypath in i)
{ <- rbind(Trips,read_excel(Mypath, col_types = c("guess","guess","guess","guess","guess","guess","guess","guess","guess","guess","guess","numeric")))
Trips
} }
Summary of Data:
summary(Trips)
## trip_id start_time end_time bikeid
## Min. :12979230 Length:3198080 Length:3198080 Min. : 1
## 1st Qu.:14052951 Class :character Class :character 1st Qu.:1817
## Median :15643448 Mode :character Mode :character Median :3605
## Mean :15326739 Mean :3451
## 3rd Qu.:16580314 3rd Qu.:5103
## Max. :17536701 Max. :6471
##
## tripduration from_station_id from_station_name to_station_id
## Min. : 60.0 Min. : 2.0 Length:3198080 Min. : 2.0
## 1st Qu.: 392.0 1st Qu.: 74.0 Class :character 1st Qu.: 74.0
## Median : 664.0 Median :156.0 Mode :character Median :156.0
## Mean : 926.2 Mean :180.6 Mean :180.7
## 3rd Qu.: 1113.0 3rd Qu.:268.0 3rd Qu.:268.0
## Max. :86338.0 Max. :626.0 Max. :626.0
##
## to_station_name usertype gender birthyear
## Length:3198080 Length:3198080 Length:3198080 Min. :1899
## Class :character Class :character Class :character 1st Qu.:1976
## Mode :character Mode :character Mode :character Median :1985
## Mean :1982
## 3rd Qu.:1990
## Max. :2017
## NA's :639854
Analysis Metrics:
- Customer-Sub Ratio
- Gender Demographic
- Trip Duration
Customer-Sub Ratio:
<- Trips$usertype
UserTypeCol <- 0
Subs <- 0
Customers
for(i in UserTypeCol)
if(i == "Subscriber")
<- Subs + 1 else
Subs <- Customers + 1
Customers pie(c(Subs,Customers),label = c(paste("Subscribers = ", round(Subs*100/(Subs + Customers), 2), "%"), paste("Customers = ", round(Customers*100/(Subs + Customers), 2), "%")))
Gender Demographic:
ggplot(data = Trips) +
geom_bar(mapping = aes(x = gender, fill = usertype), stat = "count")
Trip Duration:
ggplot(data = Trips, aes(trip_id, tripduration)) +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'