::p_load(sf, spdep, GWmodel, SpatialML, tidyverse, tmap, ggpubr, olsrr, devtools, rsample) pacman
In-Class Exercise 9: Geospatial Predictive Modelling
Import packages
Import Data
Aspatial
<- read_rds("data/aspatial/mdata.rds") mdata
Data Preprocessing
Train-test Split
set.seed(1234)
# resale_split <- initial_split(mdata,
# prop=6.5/10)
# train_data <- training(resale_split)
# test_data <- testing(resale_split)
# write_rds(train_data, "data/model/train_data.rds")
# write_rds(test_data, "data/model/test_data.rds")
<- read_rds("data/model/train_data.rds")
train_data <- read_rds("data/model/test_data.rds") test_data
Model Training
Ordinary Least Squares Method
Without geospatial weights
# price_mlr <- lm(resale_price ~ floor_area_sqm +
# storey_order + remaining_lease_mths +
# PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER +
# PROX_MRT + PROX_PARK + PROX_MALL +
# PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN +
# WITHIN_350M_CHILDCARE + WITHIN_350M_BUS +
# WITHIN_1KM_PRISCH,
# data=train_data)
# summary(price_mlr)
{# {r} # write_rds(price_mlr, "data/model/price_mlr.rds")
<- read_rds("data/model/price_mlr.rds") price_mlr
GWR Predictive Method using SpatialML
To deal with point data, we need to use sp’s spatial point data frame for the GWR to understand it.
<- as_Spatial(train_data)
train_data_sp train_data_sp
class : SpatialPointsDataFrame
features : 10335
extent : 11597.31, 42623.63, 28217.39, 48741.06 (xmin, xmax, ymin, ymax)
crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
variables : 17
names : resale_price, floor_area_sqm, storey_order, remaining_lease_mths, PROX_CBD, PROX_ELDERLYCARE, PROX_HAWKER, PROX_MRT, PROX_PARK, PROX_GOOD_PRISCH, PROX_MALL, PROX_CHAS, PROX_SUPERMARKET, WITHIN_350M_KINDERGARTEN, WITHIN_350M_CHILDCARE, ...
min values : 218000, 74, 1, 555, 0.999393538715878, 1.98943787433087e-08, 0.0333358643817954, 0.0220407324774434, 0.0441643212802781, 0.0652540365486641, 0, 6.20621206270077e-09, 1.21715176356525e-07, 0, 0, ...
max values : 1186888, 133, 17, 1164, 19.6500691667807, 3.30163731686804, 2.86763031236184, 2.13060636038504, 2.41313695915468, 10.6223726149914, 2.27100643784442, 0.808332738794272, 1.57131703651196, 7, 20, ...
Random Forest
Extract coordinate data as a vector table
<- st_coordinates(mdata)
coords <- st_coordinates(train_data)
coords_train <- st_coordinates(test_data) coords_test
write_rds(coords_train, "data/model/coords_train.rds")
write_rds(coords_test, "data/model/coords_test.rds")
Drop geometry
<- st_drop_geometry(train_data) train_data
Calibrate random forest (without spatial weights)
set.seed(1234)
<- ranger(resale_price ~ floor_area_sqm +
rf + remaining_lease_mths +
storey_order + PROX_ELDERLYCARE + PROX_HAWKER +
PROX_CBD + PROX_PARK + PROX_MALL +
PROX_MRT + WITHIN_350M_KINDERGARTEN +
PROX_SUPERMARKET + WITHIN_350M_BUS +
WITHIN_350M_CHILDCARE
WITHIN_1KM_PRISCH,data=train_data)
print(rf)
Ranger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH, data = train_data)
Type: Regression
Number of trees: 500
Sample size: 10335
Number of independent variables: 14
Mtry: 3
Target node size: 5
Variable importance mode: none
Splitrule: variance
OOB prediction error (MSE): 728602496
R squared (OOB): 0.9495728
It has a higher R2 value than the OLS.
GWR Random Forest (Adaptive)
Sorry, not enough computer memory to run this :(
set.seed(1234)
# gwRF_adaptive <- grf(formula = resale_price ~ floor_area_sqm +
# storey_order + remaining_lease_mths +
# PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER +
# PROX_MRT + PROX_PARK + PROX_MALL +
# PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN +
# WITHIN_350M_CHILDCARE + WITHIN_350M_BUS +
# WITHIN_1KM_PRISCH,
# dframe=train_data,
# bw=55, # defined as distance
# kernel="adaptive",
# coords=coords_train)
# write_rds(gwRF_adaptive, "data/model/gwRF_adaptive.rds")
# gwRF_adaptive <- read_rds("data/model/gwRF_adaptive.rds")
# vi_df <- as.data.frame(gwRF_adaptive$Global.Model$variable.importance)
Model Evaluation
Preparing test data
<- cbind(test_data, coords_test) %>%
test_data st_drop_geometry()
Predict the data
# gwRF_pred <- predict.grf(gwRF_adaptive,
# test_data,
# x.var.name="X",
# y.var.name="Y",
# local.w=1,
# global.w=0)
# gwRF_pred_df <- as.data.frame(gwRF_pred)
Model Evaluation
Get the RMSE between prediction values and ground truth
# sqrt(mean((test_predict$resale_price - test_predict$predict_grf)^2))