Send output to:
Browser Blue - Charts White
Browser Black/White
CSV
Data X:
278691 71 494 3 96 197623 70 477 4 75 233139 78 701 16 70 221690 106 1150 2 134 177540 53 395 1 69 70849 28 179 3 8 566234 130 2446 0 169 33186 19 111 0 1 226511 61 756 7 87 245577 46 638 0 92 317443 114 831 0 96 248379 124 706 7 119 200620 79 749 10 57 367785 82 1184 4 139 266325 87 717 10 87 394271 183 1744 0 176 335567 76 845 8 114 407650 168 1360 4 119 182016 57 514 3 103 267365 88 692 8 135 279428 72 847 0 123 484574 105 1369 1 89 196721 43 494 5 74 197899 56 628 9 103 256290 131 1364 1 158 255126 132 1067 0 113 281816 131 1112 5 100 278027 89 636 0 117 173134 55 610 0 59 382760 76 1195 0 142 302413 83 1021 3 136 251255 81 724 6 50 355456 96 868 1 141 109546 45 374 4 46 427915 102 1290 4 141 273950 56 1186 0 83 427825 124 1344 0 112 247287 88 673 2 79 115658 33 284 1 33 386534 205 1299 2 149 340132 83 1546 10 126 194127 71 715 10 80 213258 79 676 5 84 182398 65 487 6 68 157164 84 1051 1 50 457592 153 2084 2 101 78800 42 330 2 20 213831 81 683 0 101 368086 122 1410 10 150 203104 62 1004 3 115 244371 77 688 0 98 24188 24 218 0 8 399093 331 862 8 88 65029 17 255 5 21 101097 64 454 3 30 297973 61 1185 1 97 352671 88 728 5 149 367083 203 1207 6 132 371178 148 1089 0 161 269973 88 884 12 89 389761 149 1332 10 160 315924 121 1190 12 139 285807 121 1233 11 104 282351 90 987 8 99 250558 73 619 3 56 265696 71 528 0 163 215915 138 651 6 93 247349 154 862 10 85 260919 86 913 2 150 182308 71 635 5 143 256761 72 898 13 107 73566 32 385 6 22 263796 89 767 7 85 207899 56 818 2 85 228779 67 779 4 131 363571 89 997 4 140 382785 98 1233 3 152 220401 109 586 6 81 225097 67 751 2 136 215445 68 734 0 102 188786 49 711 1 69 481148 130 1268 1 161 145943 70 653 5 30 292287 108 700 2 120 80953 25 437 0 49 164260 57 863 0 62 179344 61 459 6 76 413462 220 1580 1 85 358697 125 1024 0 146 180679 106 1051 1 165 298696 102 843 1 89 288706 82 725 3 167 197956 66 621 10 48 282361 77 1128 1 149 329202 88 969 4 75 221875 44 669 5 100 277071 63 674 5 114 305984 87 820 0 165 416032 161 1366 12 155 412530 116 1493 13 165 297080 141 893 9 121 318235 68 900 0 156 200486 194 704 0 79 43287 14 214 4 13 189520 84 733 4 89 255152 153 855 0 110 288617 56 1251 0 129 314167 93 1069 0 169 170268 86 426 0 28 164399 99 954 0 118 350667 76 674 5 82 303273 90 696 0 148 23623 11 156 0 12 195849 74 779 0 146 61857 25 192 4 23 184709 52 546 0 83 428191 121 1213 1 163 21054 16 146 0 4 252805 52 866 5 81 31961 22 200 0 18 351541 122 1336 3 118 246359 71 726 7 76 187003 95 522 14 55 172442 56 714 3 57 38214 34 276 0 16 241539 48 789 3 88 358276 83 1031 0 137 209821 64 502 0 50 441447 86 1678 4 147 348017 99 884 0 163 439634 131 1200 3 142 208962 40 547 0 77 105332 44 422 0 42 311111 355 995 4 94 460033 192 1554 5 126 159057 58 563 16 63 411980 136 1811 5 127 173486 80 749 5 59 284582 51 648 2 117 283913 100 905 1 110 234203 120 659 0 44 386740 123 1611 9 95 246963 92 811 1 128 173260 63 716 3 41 346730 107 1034 11 146 176654 58 732 5 147 259048 90 1033 2 119 312540 111 850 1 185 1 0 0 9 0 14688 10 85 0 4 98 1 0 0 0 455 2 0 0 0 0 0 0 1 0 0 0 0 0 0 283222 91 806 2 85 409280 163 1128 3 157 0 0 0 0 0 203 4 0 0 0 7199 5 74 0 7 46660 20 259 0 12 17547 5 69 0 0 121550 46 309 0 37 969 2 0 0 0 242228 73 687 2 62
Names of X columns:
Tijd_RFC #Logins #Gedeelde_Compendia #Blogs #Reviews+120tekens
Endogenous Variable (Column Number)
Categorization
none
none
quantiles
hclust
equal
Number of categories (only if categorization<>none)
Cross-Validation? (only if categorization<>none)
no
no
yes
Chart options
R Code
library(party) library(Hmisc) par1 <- as.numeric(par1) par3 <- as.numeric(par3) x <- data.frame(t(y)) is.data.frame(x) x <- x[!is.na(x[,par1]),] k <- length(x[1,]) n <- length(x[,1]) colnames(x)[par1] x[,par1] if (par2 == 'kmeans') { cl <- kmeans(x[,par1], par3) print(cl) clm <- matrix(cbind(cl$centers,1:par3),ncol=2) clm <- clm[sort.list(clm[,1]),] for (i in 1:par3) { cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') } cl$cluster <- as.factor(cl$cluster) print(cl$cluster) x[,par1] <- cl$cluster } if (par2 == 'quantiles') { x[,par1] <- cut2(x[,par1],g=par3) } if (par2 == 'hclust') { hc <- hclust(dist(x[,par1])^2, 'cen') print(hc) memb <- cutree(hc, k = par3) dum <- c(mean(x[memb==1,par1])) for (i in 2:par3) { dum <- c(dum, mean(x[memb==i,par1])) } hcm <- matrix(cbind(dum,1:par3),ncol=2) hcm <- hcm[sort.list(hcm[,1]),] for (i in 1:par3) { memb[memb==hcm[i,2]] <- paste('C',i,sep='') } memb <- as.factor(memb) print(memb) x[,par1] <- memb } if (par2=='equal') { ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) x[,par1] <- as.factor(ed) } table(x[,par1]) colnames(x) colnames(x)[par1] x[,par1] if (par2 == 'none') { m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) } load(file='createtable') if (par2 != 'none') { m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) if (par4=='yes') { a<-table.start() a<-table.row.start(a) a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'',1,TRUE) a<-table.element(a,'Prediction (training)',par3+1,TRUE) a<-table.element(a,'Prediction (testing)',par3+1,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'Actual',1,TRUE) for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) a<-table.element(a,'CV',1,TRUE) for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) a<-table.element(a,'CV',1,TRUE) a<-table.row.end(a) for (i in 1:10) { ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) if (i==1) { m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) m.ct.i.actu <- x[ind==1,par1] m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) m.ct.x.actu <- x[ind==2,par1] } else { m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) } } print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) numer <- 0 for (i in 1:par3) { print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) numer <- numer + m.ct.i.tab[i,i] } print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) numer <- 0 for (i in 1:par3) { print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) numer <- numer + m.ct.x.tab[i,i] } print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) for (i in 1:par3) { a<-table.row.start(a) a<-table.element(a,paste('C',i,sep=''),1,TRUE) for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) a<-table.row.end(a) } a<-table.row.start(a) a<-table.element(a,'Overall',1,TRUE) for (jjj in 1:par3) a<-table.element(a,'-') a<-table.element(a,round(m.ct.i.cp,4)) for (jjj in 1:par3) a<-table.element(a,'-') a<-table.element(a,round(m.ct.x.cp,4)) a<-table.row.end(a) a<-table.end(a) table.save(a,file='mytable3.tab') } } m bitmap(file='test1.png') plot(m) dev.off() bitmap(file='test1a.png') plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') dev.off() if (par2 == 'none') { forec <- predict(m) result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) colnames(result) <- c('Actuals','Forecasts','Residuals') print(result) } if (par2 != 'none') { print(cbind(as.factor(x[,par1]),predict(m))) myt <- table(as.factor(x[,par1]),predict(m)) print(myt) } bitmap(file='test2.png') if(par2=='none') { op <- par(mfrow=c(2,2)) plot(density(result$Actuals),main='Kernel Density Plot of Actuals') plot(density(result$Residuals),main='Kernel Density Plot of Residuals') plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') par(op) } if(par2!='none') { plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') } dev.off() if (par2 == 'none') { detcoef <- cor(result$Forecasts,result$Actuals) a<-table.start() a<-table.row.start(a) a<-table.element(a,'Goodness of Fit',2,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'Correlation',1,TRUE) a<-table.element(a,round(detcoef,4)) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'R-squared',1,TRUE) a<-table.element(a,round(detcoef*detcoef,4)) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'RMSE',1,TRUE) a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) a<-table.row.end(a) a<-table.end(a) table.save(a,file='mytable1.tab') a<-table.start() a<-table.row.start(a) a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'#',header=TRUE) a<-table.element(a,'Actuals',header=TRUE) a<-table.element(a,'Forecasts',header=TRUE) a<-table.element(a,'Residuals',header=TRUE) a<-table.row.end(a) for (i in 1:length(result$Actuals)) { a<-table.row.start(a) a<-table.element(a,i,header=TRUE) a<-table.element(a,result$Actuals[i]) a<-table.element(a,result$Forecasts[i]) a<-table.element(a,result$Residuals[i]) a<-table.row.end(a) } a<-table.end(a) table.save(a,file='mytable.tab') } if (par2 != 'none') { a<-table.start() a<-table.row.start(a) a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'',1,TRUE) for (i in 1:par3) { a<-table.element(a,paste('C',i,sep=''),1,TRUE) } a<-table.row.end(a) for (i in 1:par3) { a<-table.row.start(a) a<-table.element(a,paste('C',i,sep=''),1,TRUE) for (j in 1:par3) { a<-table.element(a,myt[i,j]) } a<-table.row.end(a) } a<-table.end(a) table.save(a,file='mytable2.tab') }
Compute
Summary of computational transaction
Raw Input
view raw input (R code)
Raw Output
view raw output of R engine
Computing time
2 seconds
R Server
Big Analytics Cloud Computing Center
Click here to blog (archive) this computation