## ----setup,cache=FALSE,echo=FALSE--------------------------------------------- current.file.basename <- "ensemble" ## ----common-setup,child='beamer.Rnw'------------------------------------------ ## ----setupforbeamer,cache=FALSE,echo=FALSE,results='hide',message=FALSE------- set.seed(0) datadir <- paste("data-",current.file.basename,sep="") ## full size by default ## beamer sizes: 5.04x3.78 opts_chunk$set(echo=FALSE,cache.path=paste(".cache-",current.file.basename,"/",sep=""), fig.env='center', fig.width=4.5,fig.height=3.4, fig.path=paste("figures-",current.file.basename,"/",sep="")) options(scipen = 5, OutDec=".") beamerblue <- rgb(0.2,0.2,0.7) beamergreen <- rgb(0,0.5,0) beamerblue.alpha <- rgb(0.2,0.2,0.7,0.5) apiacoa.colors <- list("darkblue"="#051F42", "darkorange"="#E84017", "darkgreen"="#3D6B34", "blue"="#003075", "lightblue"="#99BFBB", "darkrose"="#997568") palette(unlist(apiacoa.colors)) to_alpha <- function(color,alpha=0.5) { dacol <- as.vector(col2rgb(color)) rgb(red=dacol[1]/255,green=dacol[2]/255,blue=dacol[3]/255,alpha=alpha) } ## tool for multiple figure printing hook_plot <- knit_hooks$get('plot') # the default hook hook_plot_multi <- function(x, options) { txt = hook_plot(x, options) if (options$fig.cur <= 0) return(txt) # add \only before \includegraphics gsub('(\\\\includegraphics[^}]+} )', sprintf('\\\\only<%d>{\\1}%%', options$fig.cur), txt) } to_single_fig <- function() { knit_hooks$set(plot = hook_plot) } to_multi_fig <- function() { knit_hooks$set(plot = hook_plot_multi) } ## standard packages always used library(xtable) ## ----post-setup--------------------------------------------------------------- ## local adaptation opts_chunk$set(fig.height=3.25) ## ----packages,cache=FALSE,echo=FALSE,results='hide',message=FALSE,warning=FALSE---- library(doMC) registerDoMC() library(tidyverse) library(adabag) library(rpart.plot) ## ----------------------------------------------------------------------------- risk.majvot.ex1 <- 1-pbinom(10,size=21,p=0.45) risk.majvot.ex2 <- 1-pbinom(10,size=21,p=0.3) ## ----warning=FALSE------------------------------------------------------------ par(mar=c(4,4,2,0.1),cex=0.75) theprobs <- seq(0.5,0.05,length=1+9*4) plot(theprobs,1-pbinom(25,size=51,p=theprobs),type="l",col=4,xlab="Risk of one model",ylab="Risk of the majority vote") lines(theprobs,1-pbinom(10,size=21,p=theprobs),col=1) lines(theprobs,1-pbinom(5,size=11,p=theprobs),col=2) lines(theprobs,1-pbinom(2,size=5,p=theprobs),col=3) abline(h=seq(0.05,0.5,by=0.05),col="grey",lty=2) abline(v=seq(0.05,0.5,by=0.05),col="grey",lty=2) legend("bottomright",legend=paste("K=",c(51,21,11,5),sep=""),col=c(4,1,2,3),lwd=1,bg="white") ## ----------------------------------------------------------------------------- data(Sonar,package = "mlbench") ## ----SonarBoost,cache=TRUE---------------------------------------------------- Sonar.boost <- boosting(Class~V11+V16,data=Sonar,coeflearn='Breiman',mfinal=50,boos=FALSE) ## ----fig.height=2------------------------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=Sonar$Class,pch=20,xlab="V11",ylab="V16") legend("topright",legend=levels(Sonar$Class),col=c(1,2),pch=20) ## ----predandco,cache=TRUE----------------------------------------------------- weights <- matrix(NA,nrow=nrow(Sonar),ncol=length(Sonar.boost$weights)) predictions <- matrix(NA,nrow=nrow(Sonar),ncol=length(Sonar.boost$weights)) weights[,1] <- 1/nrow(Sonar) for(k in 2:ncol(weights)) { predictions[,k-1] <- predict(Sonar.boost$trees[[k-1]])[,1] weights[,k] <- weights[,k-1]*exp(Sonar.boost$weights[k-1]*as.numeric(predict(Sonar.boost$trees[[k-1]],type="class")!=Sonar$Class)) weights[,k] <- weights[,k]/sum(weights[,k]) } predictions[,length(Sonar.boost$weights)] <- predict(Sonar.boost$trees[[length(Sonar.boost$weights)]])[1,] cum.predictions <- matrix(NA,nrow=nrow(Sonar),ncol=length(Sonar.boost$weights)) cum.predictions[,1] <- as.integer(predictions[,1]>0.5)*Sonar.boost$weights[1] for(k in 2:ncol(weights)) { cum.predictions[,k] <- cum.predictions[,k-1]+as.integer(predictions[,k]>0.5)*Sonar.boost$weights[k] } cum.decision <- cumsum(Sonar.boost$weights)/2 ## ----------------------------------------------------------------------------- V11.span <- seq(min(Sonar$V11),max(Sonar$V11),length.out=101) V16.span <- seq(min(Sonar$V16),max(Sonar$V16),length.out=101) add.decision <- function(tree,V11,V16) { the.grid <- expand.grid(V11=V11,V16=V16) contour(V11,V16,matrix(predict(tree,newdata=the.grid)[,1],nrow=length(V11)),drawlabels=FALSE,add=TRUE,levels=0.5,col=3) } my.greys <- rev(grey(seq(0,0.75,length.out=51),alpha=0.5)) to.ints <- function(x,nb) { x <- (x-min(x))/(max(x)-min(x)) as.integer(1+x*(nb-1)) } add.cumdecision <- function(boost,nb,V11,V16) { the.grid <- expand.grid(V11=V11,V16=V16) cum.decision <- cumsum(boost$weights)/2 predictions <- as.integer(predict(boost$trees[[1]],newdata=the.grid)[,1]>0.5)*boost$weights[1] for(k in 2:nb) { predictions <- predictions + as.integer(predict(boost$trees[[k]],newdata=the.grid)[,1]>0.5)*boost$weights[k] } contour(V11,V16,matrix(predictions/(2*cum.decision[nb]),nrow=length(V11)),drawlabels=FALSE,add=TRUE,levels=0.5,col=3) } ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=Sonar$Class,pch=20,xlab="V11",ylab="V16") ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[1]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=Sonar$Class,pch=20,xlab="V11",ylab="V16") add.decision(Sonar.boost$trees[[1]],V11.span,V16.span) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[1]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=as.integer(predictions[,1]<0.5)+1,pch=20,xlab="V11",ylab="V16") add.decision(Sonar.boost$trees[[1]],V11.span,V16.span) points(Sonar$V11,Sonar$V16,col=Sonar$Class) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[1]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,pch=20,xlab="V11",ylab="V16", col=my.greys[to.ints(weights[,2],length(my.greys))]) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[1]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,pch=20,xlab="V11",ylab="V16", col=Sonar$Class) add.decision(Sonar.boost$trees[[2]],V11.span,V16.span) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[2]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=as.integer(predictions[,2]<0.5)+1,pch=20,xlab="V11",ylab="V16") add.decision(Sonar.boost$trees[[2]],V11.span,V16.span) points(Sonar$V11,Sonar$V16,col=Sonar$Class) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5) prp(Sonar.boost$trees[[2]],roundint=FALSE) ## ----fig.width=2.2,fig.height=3----------------------------------------------- par(cex=0.5,mar=c(4,4,0.1,0.1)) plot(Sonar$V11,Sonar$V16,col=as.integer(cum.predictions[,2]