## ----setup, echo = FALSE, results = "hide", message = FALSE, warnings = FALSE----
suppressWarnings(RNGversion("3.5.2"))
pkgs <- c("partykit", "coin", "strucchange", "Formula", "survival",
          "sandwich", "party", "TH.data", "knitr")
pkgs <- sapply(pkgs, require, character.only = TRUE)
if (pkgs["party"]) detach(package:party)
set.seed(290875)

## ----fail, results = "asis", echo = FALSE---------------------------
if (any(!pkgs))
{
    cat(paste("Package(s)", paste(names(pkgs)[!pkgs], collapse = ", "), 
        "not available, stop processing.",
        "\\end{document}\n"))
    knitr::knit_exit()
}

## ----party-max, echo = TRUE, results = "hide"-----------------------
ctree_control(teststat = "max")

## ----party-quad, echo = TRUE, results = "hide"----------------------
ctree_control(teststat = "quad")

## ----party-Bonf, echo = TRUE, results = "hide"----------------------
ctree_control(testtype = "Bonferroni")

## ----party-minsplit, echo = TRUE, results = "hide"------------------
ctree_control(minsplit = 20)

## ----party-maxsurr, echo = TRUE, results = "hide"-------------------
ctree_control(maxsurrogate = 3)

## ----party-data, echo = TRUE----------------------------------------
ls <- data.frame(y = gl(3, 50, labels = c("A", "B", "C")), 
                 x1 = rnorm(150) + rep(c(1, 0, 0), c(50, 50, 50)), 
                 x2 = runif(150))

## ----party-formula, echo = TRUE, results = "hide"-------------------
library("partykit")
ctree(y ~ x1 + x2, data = ls)

## ----party-fitted, echo = TRUE--------------------------------------
ct <- ctree(y ~ x1 + x2, data = ls)

## ----party-print, echo = TRUE---------------------------------------
ct

## ----party-plot, echo = TRUE,  fig.height = 5, fig.width = 8--------
plot(ct)

## ----party-nodes, echo = TRUE---------------------------------------
ct[1]

## ----party-nodelist, echo = TRUE------------------------------------
class(ct[1])

## ----party-predict, echo = TRUE-------------------------------------
predict(ct, newdata = ls)

## ----party-treeresponse, echo = TRUE--------------------------------
predict(ct, newdata = ls[c(1, 51, 101),], type = "prob")

## ----party-where, echo = TRUE---------------------------------------
predict(ct, newdata = ls[c(1,51,101),], type = "node")

## ----party-sctest, echo = TRUE--------------------------------------
if (require("strucchange"))
    print(sctest(ct))

## ----treepipit-ctree, echo = TRUE-----------------------------------
data("treepipit", package = "coin")
tptree <- ctree(counts ~ ., data = treepipit)

## ----treepipit-plot, echo = TRUE,  fig.height = 5, fig.width = 8----
plot(tptree, terminal_panel = node_barplot)

## ----treepipit-x, echo = FALSE--------------------------------------
p <- info_node(node_party(tptree))$p.value
n <- table(predict(tptree, type = "node"))

## ----glaucoma-ctree, echo = TRUE------------------------------------
data("GlaucomaM", package = "TH.data")
gtree <- ctree(Class ~ ., data = GlaucomaM)

## ----glaucoma-x, echo = FALSE, results = "hide"---------------------
sp <- split_node(node_party(gtree))$varID

## ----glaucoma-plot, echo = FALSE,  fig.height = 6, fig.width = 10----
plot(gtree)

## ----glaucoma-plot-inner, echo = FALSE,  fig.height = 7, fig.width = 10----
plot(gtree, inner_panel = node_barplot, 
     edge_panel = function(...) invisible(), tnex = 1)

## ----glaucoma-plot2, echo = TRUE, eval = FALSE----------------------
# plot(gtree)

## ----glaucoma-plot-inner-bar, echo = TRUE, eval = FALSE-------------
# plot(gtree, inner_panel = node_barplot,
#      edge_panel = function(...) invisible(), tnex = 1)

## ----glaucoma-prediction, echo = TRUE-------------------------------
table(predict(gtree), GlaucomaM$Class)

## ----glaucoma-classprob, echo = TRUE,  fig.height = 4, fig.width = 5----
prob <- predict(gtree, type = "prob")[,1] + 
                runif(nrow(GlaucomaM), min = -0.01, max = 0.01)
splitvar <- character_split(split_node(node_party(gtree)), 
                            data = data_party(gtree))$name
plot(GlaucomaM[[splitvar]], prob, 
     pch = as.numeric(GlaucomaM$Class), ylab = "Conditional Class Prob.",
     xlab = splitvar)
abline(v = split_node(node_party(gtree))$breaks, lty = 2)
legend(0.15, 0.7, pch = 1:2, legend = levels(GlaucomaM$Class), bty = "n")

## ----GBSGS-ctree, echo = TRUE---------------------------------------
data("GBSG2", package = "TH.data")  
library("survival")
(stree <- ctree(Surv(time, cens) ~ ., data = GBSG2))

## ----GBSG2-plot, echo = TRUE,  fig.height = 6, fig.width = 10-------
plot(stree)

## ----GBSG2-KM, echo = TRUE------------------------------------------
pn <- predict(stree, newdata = GBSG2[1:2,], type = "node")
n <- predict(stree, type = "node")
survfit(Surv(time, cens) ~ 1, data = GBSG2, subset = (n == pn[1]))
survfit(Surv(time, cens) ~ 1, data = GBSG2, subset = (n == pn[2]))

## ----mammo-ctree, echo = TRUE---------------------------------------
data("mammoexp", package = "TH.data")
mtree <- ctree(ME ~ ., data = mammoexp)

## ----mammo-plot, echo = TRUE,  fig.height = 6, fig.width = 13-------
plot(mtree)

## ----spider-ctree, echo = TRUE--------------------------------------
data("HuntingSpiders", package = "partykit")
sptree <- ctree(arct.lute + pard.lugu + zora.spin + pard.nigr +
  pard.pull + aulo.albi + troc.terr + alop.cune + pard.mont + alop.acce +
  alop.fabr + arct.peri ~ herbs + reft + moss + sand + twigs + water,
  data = HuntingSpiders, teststat = "max", minsplit = 5,
  pargs = GenzBretz(abseps = .1, releps = .1))

## ----spider-plot1, echo = TRUE,  fig.height = 6, fig.width = 13-----
plot(sptree, terminal_panel = node_barplot)

## ----spider-plot2, echo = TRUE,  fig.width = 10, fig.height = 22----
plot(sptree)

## ----party-setup, echo = FALSE, results = "hide", message = FALSE, warnings = FALSE----
library("party")
set.seed(290875)

## ----party-airq, echo = TRUE----------------------------------------
data("airquality", package = "datasets")
airq <- subset(airquality, !is.na(Ozone))
(airct_party <- party::ctree(Ozone ~ ., data = airq, 
     controls = party::ctree_control(maxsurrogate = 3)))
mean((airq$Ozone - predict(airct_party))^2)

## ----partykit-airq, echo = TRUE-------------------------------------
(airct_partykit <- partykit::ctree(Ozone ~ ., data = airq, 
     control = partykit::ctree_control(maxsurrogate = 3, 
                                       numsurrogate = TRUE)))
mean((airq$Ozone - predict(airct_partykit))^2)
table(predict(airct_party, type = "node"),
      predict(airct_partykit, type = "node"))
max(abs(predict(airct_party) - predict(airct_partykit)))

## ----party-partykit-airq, echo = TRUE-------------------------------
airct_party@tree$criterion
info_node(node_party(airct_partykit))

## ----party-partykit-iris, echo = TRUE-------------------------------
(irisct_party <- party::ctree(Species ~ .,data = iris))
(irisct_partykit <- partykit::ctree(Species ~ .,data = iris, 
control = partykit::ctree_control(splitstat = "maximum")))
table(predict(irisct_party, type = "node"),
      predict(irisct_partykit, type = "node"))

## ----party-iris, echo = TRUE----------------------------------------
tr_party <- treeresponse(irisct_party, newdata = iris)

## ----partykit-iris, echo = TRUE-------------------------------------
tr_partykit <- predict(irisct_partykit, type = "prob", 
                       newdata = iris)
max(abs(do.call("rbind", tr_party) - tr_partykit))

## ----party-partykit-mammoexp, echo = TRUE---------------------------
### ordinal regression
data("mammoexp", package = "TH.data")
(mammoct_party <- party::ctree(ME ~ ., data = mammoexp))
### estimated class probabilities
tr_party <- treeresponse(mammoct_party, newdata = mammoexp)
(mammoct_partykit <- partykit::ctree(ME ~ ., data = mammoexp))
### estimated class probabilities
tr_partykit <- predict(mammoct_partykit, newdata = mammoexp, type = "prob")
max(abs(do.call("rbind", tr_party) - tr_partykit))

## ----party-partykit-GBSG2, echo = TRUE------------------------------
data("GBSG2", package = "TH.data")
(GBSG2ct_party <- party::ctree(Surv(time, cens) ~ .,data = GBSG2))
(GBSG2ct_partykit <- partykit::ctree(Surv(time, cens) ~ .,data = GBSG2))

## ----party-partykit-KM, echo = TRUE---------------------------------
tr_party <- treeresponse(GBSG2ct_party, newdata = GBSG2)
tr_partykit <- predict(GBSG2ct_partykit, newdata = GBSG2, type = "prob")
all.equal(lapply(tr_party, function(x) unclass(x)[!(names(x) %in% "call")]),
lapply(tr_partykit, function(x) unclass(x)[!(names(x) %in% "call")]),
check.names = FALSE)

## ----nf-alpha, echo = TRUE------------------------------------------
(airct_partykit_1 <- partykit::ctree(Ozone ~ ., data = airq, 
     control = partykit::ctree_control(maxsurrogate = 3, alpha = 0.001,
                                       numsurrogate = FALSE)))
depth(airct_partykit_1)
mean((airq$Ozone - predict(airct_partykit_1))^2)

## ----nf-maxstat, echo = TRUE----------------------------------------
(airct_partykit <- partykit::ctree(Ozone ~ ., data = airq, 
     control = partykit::ctree_control(maxsurrogate = 3, splittest = TRUE,
                                       testtype = "MonteCarlo"))) 

## ----nf-nmax, echo = TRUE-------------------------------------------
(irisct_partykit_1 <- partykit::ctree(Species ~ .,data = iris, 
control = partykit::ctree_control(splitstat = "maximum", nmax = 25)))
table(predict(irisct_partykit), predict(irisct_partykit_1))

## ----nf-multiway, echo = TRUE---------------------------------------
GBSG2$tgrade <- factor(GBSG2$tgrade, ordered = FALSE)
(GBSG2ct_partykit <- partykit::ctree(Surv(time, cens) ~ tgrade,
    data = GBSG2, control = partykit::ctree_control(multiway = TRUE, 
                                                    alpha = .5)))

## ----nf-cluster, echo = TRUE----------------------------------------
airq$month <- factor(airq$Month)
(airct_partykit_3 <- partykit::ctree(Ozone ~ Solar.R + Wind + Temp, data = airq,
cluster = month, control = partykit::ctree_control(maxsurrogate = 3)))
info_node(node_party(airct_partykit_3))
mean((airq$Ozone - predict(airct_partykit_3))^2)

## ----nf-ytrafo-1, echo = TRUE---------------------------------------
### with weight-dependent log-rank scores
### log-rank trafo for observations in this node only (= weights > 0)
h <- function(y, x, start = NULL, weights, offset, estfun = TRUE, object = FALSE, ...) {
    if (is.null(weights)) weights <- rep(1, NROW(y))
    s <- logrank_trafo(y[weights > 0,,drop = FALSE])
    r <- rep(0, length(weights))
    r[weights > 0] <- s
    list(estfun = matrix(as.double(r), ncol = 1), converged = TRUE, unweighted = TRUE)
}
partykit::ctree(Surv(time, cens) ~ ., data = GBSG2, ytrafo = h)

## ----nf-ytrafo-2, echo = TRUE---------------------------------------
### normal varying intercept / varying coefficient model (aka "mob")
h <- function(y, x, start = NULL, weights = NULL, offset = NULL, cluster = NULL, ...)
  glm(y ~ 0 + x, family = gaussian(), start = start, weights = weights, ...)
(airct_partykit_4 <- partykit::ctree(Ozone ~ Temp | Solar.R + Wind, 
    data = airq, cluster = month, ytrafo = h, 
    control = partykit::ctree_control(maxsurrogate = 3)))
airq$node <- factor(predict(airct_partykit_4, type = "node"))
summary(m <- glm(Ozone ~ node + node:Temp - 1, data = airq))
mean((predict(m) - airq$Ozone)^2)

## ----airq-mob, echo = TRUE------------------------------------------
airq_lmtree <- partykit::lmtree(Ozone ~ Temp | Solar.R + Wind, 
                                data = airq, cluster = month)
info_node(node_party(airq_lmtree))
mean((predict(airq_lmtree, newdata = airq) - airq$Ozone)^2)

## ----closing, echo = FALSE, results = "hide"------------------------
detach(package:party)

