library(tsrecipes)
library(tidymodels)
#> ── Attaching packages ────────────────────────────────────────────────── tidymodels 0.1.1 ──
#> ✓ broom 0.7.0 ✓ recipes 0.1.13
#> ✓ dials 0.0.8 ✓ rsample 0.0.7
#> ✓ dplyr 1.0.0 ✓ tibble 3.0.3
#> ✓ ggplot2 3.3.2 ✓ tidyr 1.1.0
#> ✓ infer 0.5.3 ✓ tune 0.1.1
#> ✓ modeldata 0.0.2 ✓ workflows 0.1.3
#> ✓ parsnip 0.1.3 ✓ yardstick 0.0.7
#> ✓ purrr 0.3.4
#> ── Conflicts ───────────────────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x recipes::step() masks stats::step()
lg <- logistic_reg(penalty = tune(), mixture = 1) %>%
set_engine("glmnet")
rec <- recipe(prices) %>%
update_role(everything(), new_role = "var") %>%
update_role(class, new_role = "outcome") %>%
step_dct(ts, k = tune())
pipeline <- workflow() %>%
add_model(lg) %>%
add_recipe(rec)
coef_grid <- expand_grid(
k = c(4, 8, 16, 32),
penalty = 10^seq(-4, -1, length.out = 30)
)
cv_results <- pipeline %>%
tune_grid(
resamples = vfold_cv(prices),
grid = coef_grid
)
cv_results %>%
collect_metrics() %>%
filter(.metric == "accuracy") %>%
arrange(desc(mean))
#> # A tibble: 120 x 8
#> penalty k .metric .estimator mean n std_err .config
#> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <fct>
#> 1 0.0189 32 accuracy binary 0.696 10 0.0168 Recipe4_Model23
#> 2 0.00174 8 accuracy binary 0.695 10 0.0127 Recipe2_Model13
#> 3 0.00221 8 accuracy binary 0.695 10 0.0128 Recipe2_Model14
#> 4 0.00356 8 accuracy binary 0.695 10 0.0125 Recipe2_Model16
#> 5 0.00452 8 accuracy binary 0.695 10 0.0128 Recipe2_Model17
#> 6 0.00137 8 accuracy binary 0.694 10 0.0129 Recipe2_Model12
#> 7 0.00574 8 accuracy binary 0.694 10 0.0133 Recipe2_Model18
#> 8 0.00728 8 accuracy binary 0.694 10 0.0133 Recipe2_Model19
#> 9 0.00281 8 accuracy binary 0.694 10 0.0123 Recipe2_Model15
#> 10 0.00281 16 accuracy binary 0.694 10 0.0136 Recipe3_Model15
#> # … with 110 more rows
cv_results %>%
show_best("accuracy")
#> # A tibble: 5 x 8
#> penalty k .metric .estimator mean n std_err .config
#> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <fct>
#> 1 0.0189 32 accuracy binary 0.696 10 0.0168 Recipe4_Model23
#> 2 0.00174 8 accuracy binary 0.695 10 0.0127 Recipe2_Model13
#> 3 0.00221 8 accuracy binary 0.695 10 0.0128 Recipe2_Model14
#> 4 0.00356 8 accuracy binary 0.695 10 0.0125 Recipe2_Model16
#> 5 0.00452 8 accuracy binary 0.695 10 0.0128 Recipe2_Model17
model <- pipeline %>%
finalize_workflow(cv_results %>% select_best("accuracy")) %>%
fit(data = prices)
model %>%
pull_workflow_fit() %>%
vip::vip()
step <- model$pre$mold$blueprint$recipe$steps[[1]]
prices_coef <- prices %>%
bind_cols(model$pre$mold$predictors)
price_recon <- prices_coef %>%
reconstruct("ts", step, starts_with("dct_"))
price_recon %>%
sample_n(10) %>%
unnest(c(ts, ts_recon, n)) %>%
ggplot(aes(n)) +
geom_line(aes(y = ts), color = "red") +
geom_line(aes(y = ts_recon), color = "blue") +
facet_wrap(~id)
ts.32_imp <- prices_coef %>%
summarise(across(starts_with("dct_"), mean)) %>%
mutate(dct_32_ts = list(seq(
min(prices_coef$dct_32_ts),
max(prices_coef$dct_32_ts)
))) %>%
unnest(dct_32_ts) %>%
bind_cols(predict(pull_workflow_fit(model), ., type = "prob"))
ts.32_imp %>%
ggplot(aes(dct_32_ts, .pred_increase)) +
geom_line()
ts.32_recon <- ts.32_imp %>%
reconstruct("ts", step, starts_with("dct_"))
set.seed(10)
ts.32_recon %>%
select(dct_32_ts, ts_recon, n) %>%
sample_n(10) %>%
unnest(c(ts_recon, n)) %>%
ggplot(aes(n, ts_recon, color = as.factor(dct_32_ts))) +
geom_line(show.legend = FALSE) +
facet_wrap(~dct_32_ts)
prices %>%
count(class) %>%
mutate(n / sum(n))
#> # A tibble: 2 x 3
#> class n `n/sum(n)`
#> <fct> <int> <dbl>
#> 1 no_increase 663 0.687
#> 2 increase 302 0.313