# Programs used for graphics and tables of the book: # "Data anlysis and data mining" by A.Azzalini and B.Scarpa, # © Oxford University Press, 2012 (ISBN 978-0-19-976710-6). # # Code regarding section 5.7 (© 2003, 2004, 2012 A.Azzalini and B.Scarpa) #------------------------------------------------------------------------ source("base-www.R") # x<- seq(0.5, 3, length=100) y <-c( 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1) # name <- "figure 5.18" plot(x,y, type="n", ylim=c(0,1)) points(x,y,col=1, pch=16, cex=0.9) pause(name) # library(tree) t1<- tree(factor(y) ~ x) # name<- "figure 5.19a" plot(t1) text(t1, digits=2, pretty=1) pause(name) # x0 <- seq(0.5, 3, length=500) pr1<- predict(t1, newdata=data.frame(x=x0)) name <- "figure 5.19b" plot(x,y, type="n", ylim=c(0,1)) points(x,y,col=col2) lines(x0,pr1[,2]) pause(name) #----------------------------------- # juice <- read.table("juice.data", head=TRUE) juice[,"store"] <- factor(juice[,"store"]) v <- c(1,3,6:9,13,21) juice <- juice[,v] #--select training and test set.seed(123) n <- nrow(juice) n1 <- round(n*0.75) n2<- n-n1 permutation<- sample(1:n,n) training <- sort(permutation[1:n1]) # juice1 <- juice[training,] juice2 <- juice[-training,] #----------------------------------- # #library(tree) set.seed(123) part1 <- sort(sample(training, 600)) part2 <- setdiff(training, part1) f1 <- as.formula(paste("choice~", paste(names(juice1)[-1], collapse="+"), collapse=NULL)) t1<- tree(f1, data=juice[part1,], control=tree.control(nobs=length(part1), minsize=2, mindev=0)) t2<- prune.tree(t1, newdata=juice[part2,]) # name<- "figure 5.20a" plot(t2) pause(name) # J <- t2$size[t2$dev==min(t2$dev)] t3<-prune.tree(t1, best=J) # name<- "figure 5.20b" plot(t3) text(t3) pause(name) # p3<- predict(t3, newdata=juice2, type="class") # name<- "table 5.11" matrice.confusione(p3, juice2[,"choice"]) pause(name) # p3 <- predict(t3, newdata=juice2, type="vector")[,2] a<- lift.roc(p3, as.numeric(juice2[,"choice"]=="MM"), type="bin", plot.it=FALSE) # name<- "figure 5.21a" plot(a[[1]], a[[2]], type="b", xlab="Fraction of predicted subjects", ylab="Improvement factor", col=col1, pch=pch1, cex=cex0) pause(name) #--- name<- "figure 5.21b" plot(a[[3]], a[[4]], type="b", xlim=c(0,1),pch=pch1, ylim=c(0,1),cex=cex0, xlab="1-specificity", ylab="Sensibility", col=col1) pause(name) # #-------------------------------------- dataset <- read.table("classes.dat", head=TRUE) x1 <- dataset[,1] x2 <- dataset[,2] gr <- as.factor(dataset[,3]) K <- 3 t3<- tree(gr~x1+x2, control=tree.control(nobs=nrow(dataset), mindev=0, minsize=2)) # name<- "figure 5.22a" plot(t3) pause(name) # set.seed(5) t4<- cv.tree(t3) # name<- "figure 5.22b" plot(t4) pause(name) # t5<- prune.tree(t3, k=10) # name<- "figure 5.22c" plot(t5) text(t5) pause(name) # n.grid<- 250 p <- pred.square(t5, x1, x2, n.grid) pred <- array(p$pred, c(n.grid, n.grid,3)) ind <- apply(pred, c(1,2),order)[3,,] # name<- "figure 5.22d" plot(dataset[,1:2], type="n", xlab=expression(italic(x)[1]), ylab=expression(italic(x)[2])) for(k in 1:3) points(dataset[gr==k,1:2], pch=Pch[k], col=Col[k], cex=cex0) contour(p$x, p$y, ind, add=TRUE, drawlabels=FALSE, nlevels=2, lty=1) text(-4, 1, labels=1, cex=cex0*2) text(3,0, labels=2, cex=cex0*2) text(2, -4, labels=3, cex=cex0*2) pause(name) # detach(package:tree) detach.all()