Send output to:
Browser Blue - Charts White
Browser Black/White
CSV
Data X:
279055 73 96 130 212408 75 75 143 233939 83 70 118 222117 106 134 146 179751 55 72 73 70849 28 8 89 605767 135 173 146 33186 19 1 22 227332 62 88 132 258874 48 98 92 359064 120 112 147 264989 131 125 203 212638 87 57 113 368577 85 139 171 269455 88 87 87 397992 190 176 208 335567 76 114 153 428322 172 121 97 182016 58 103 95 267365 89 135 197 279428 73 123 160 508849 111 99 148 206722 47 74 84 200004 58 103 227 257139 133 158 154 270941 138 116 151 324969 134 114 142 329962 92 150 148 190867 60 64 110 393860 79 150 149 327660 89 143 179 269239 83 50 149 391045 105 145 187 130446 49 56 153 430118 104 141 163 273950 56 83 127 428077 128 112 151 254312 93 79 100 120351 35 33 46 395643 211 152 156 345875 86 126 128 216827 82 97 111 224524 83 84 119 182485 69 68 148 157164 85 50 65 459455 157 101 134 78800 42 20 66 217932 84 101 201 368086 123 150 177 230299 70 129 156 244782 81 99 158 24188 24 8 7 400109 334 88 175 65029 17 21 61 101097 64 30 41 309810 67 102 133 369627 90 163 228 367127 204 132 140 377704 154 161 155 280106 90 90 141 400971 153 160 181 315924 122 139 75 291391 124 104 97 295075 93 103 142 280018 81 66 136 267432 71 163 87 217181 141 93 140 258166 159 85 169 260919 87 150 129 182961 73 143 92 256967 74 107 160 73566 32 22 67 272362 93 85 179 229056 62 101 90 229851 70 131 144 371391 91 140 144 398210 104 156 144 220419 111 81 134 231884 72 137 146 217714 72 102 121 200046 53 72 112 483074 131 161 145 146100 72 30 99 295224 109 120 96 80953 25 49 27 217384 63 121 77 179344 62 76 137 415550 221 85 151 389059 129 151 126 180679 106 165 159 299505 104 89 101 292260 84 168 144 199481 68 48 102 282361 78 149 135 329281 89 75 147 234577 48 107 155 297995 67 116 138 329583 89 173 113 416463 163 155 248 415683 119 165 116 297080 142 121 176 318283 70 156 140 224033 199 86 59 43287 14 13 64 238089 87 120 40 263322 160 117 98 299566 60 133 139 321797 95 169 135 193926 95 39 97 175138 105 125 142 354041 78 82 155 303273 91 148 115 23668 13 12 0 196743 79 146 103 61857 25 23 30 217543 54 87 130 440711 128 164 102 21054 16 4 0 252805 52 81 77 31961 22 18 9 360436 125 118 150 251948 77 76 163 187003 96 55 148 180842 58 62 94 38214 34 16 21 280392 56 98 151 358276 84 137 187 211775 67 50 171 447335 90 152 170 348017 99 163 145 441946 133 142 198 215177 43 80 152 130177 47 59 112 316128 363 94 173 466139 198 128 177 162279 62 63 153 416643 140 127 161 178322 86 60 115 292443 54 118 147 283913 100 110 124 244802 126 45 57 387072 125 96 144 246963 92 128 126 173260 63 41 78 346748 108 146 153 176654 59 147 196 268189 95 121 130 314070 112 185 159 1 0 0 0 14688 10 4 0 98 1 0 0 455 2 0 0 0 0 0 0 0 0 0 0 291650 94 85 94 415421 168 164 129 0 0 0 0 203 4 0 0 7199 5 7 0 46660 20 12 13 17547 5 0 4 121550 46 37 89 969 2 0 0 242774 75 62 71
Names of X columns:
Tijd_RFC #Logins #Blogs #Reviews+120tekens
Endogenous Variable (Column Number)
Categorization
equal
none
quantiles
hclust
equal
Number of categories (only if categorization<>none)
Cross-Validation? (only if categorization<>none)
no
no
yes
Chart options
R Code
library(party) library(Hmisc) par1 <- as.numeric(par1) par3 <- as.numeric(par3) x <- data.frame(t(y)) is.data.frame(x) x <- x[!is.na(x[,par1]),] k <- length(x[1,]) n <- length(x[,1]) colnames(x)[par1] x[,par1] if (par2 == 'kmeans') { cl <- kmeans(x[,par1], par3) print(cl) clm <- matrix(cbind(cl$centers,1:par3),ncol=2) clm <- clm[sort.list(clm[,1]),] for (i in 1:par3) { cl$cluster[cl$cluster==clm[i,2]] <- paste('C',i,sep='') } cl$cluster <- as.factor(cl$cluster) print(cl$cluster) x[,par1] <- cl$cluster } if (par2 == 'quantiles') { x[,par1] <- cut2(x[,par1],g=par3) } if (par2 == 'hclust') { hc <- hclust(dist(x[,par1])^2, 'cen') print(hc) memb <- cutree(hc, k = par3) dum <- c(mean(x[memb==1,par1])) for (i in 2:par3) { dum <- c(dum, mean(x[memb==i,par1])) } hcm <- matrix(cbind(dum,1:par3),ncol=2) hcm <- hcm[sort.list(hcm[,1]),] for (i in 1:par3) { memb[memb==hcm[i,2]] <- paste('C',i,sep='') } memb <- as.factor(memb) print(memb) x[,par1] <- memb } if (par2=='equal') { ed <- cut(as.numeric(x[,par1]),par3,labels=paste('C',1:par3,sep='')) x[,par1] <- as.factor(ed) } table(x[,par1]) colnames(x) colnames(x)[par1] x[,par1] if (par2 == 'none') { m <- ctree(as.formula(paste(colnames(x)[par1],' ~ .',sep='')),data = x) } load(file='createtable') if (par2 != 'none') { m <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data = x) if (par4=='yes') { a<-table.start() a<-table.row.start(a) a<-table.element(a,'10-Fold Cross Validation',3+2*par3,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'',1,TRUE) a<-table.element(a,'Prediction (training)',par3+1,TRUE) a<-table.element(a,'Prediction (testing)',par3+1,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'Actual',1,TRUE) for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) a<-table.element(a,'CV',1,TRUE) for (jjj in 1:par3) a<-table.element(a,paste('C',jjj,sep=''),1,TRUE) a<-table.element(a,'CV',1,TRUE) a<-table.row.end(a) for (i in 1:10) { ind <- sample(2, nrow(x), replace=T, prob=c(0.9,0.1)) m.ct <- ctree(as.formula(paste('as.factor(',colnames(x)[par1],') ~ .',sep='')),data =x[ind==1,]) if (i==1) { m.ct.i.pred <- predict(m.ct, newdata=x[ind==1,]) m.ct.i.actu <- x[ind==1,par1] m.ct.x.pred <- predict(m.ct, newdata=x[ind==2,]) m.ct.x.actu <- x[ind==2,par1] } else { m.ct.i.pred <- c(m.ct.i.pred,predict(m.ct, newdata=x[ind==1,])) m.ct.i.actu <- c(m.ct.i.actu,x[ind==1,par1]) m.ct.x.pred <- c(m.ct.x.pred,predict(m.ct, newdata=x[ind==2,])) m.ct.x.actu <- c(m.ct.x.actu,x[ind==2,par1]) } } print(m.ct.i.tab <- table(m.ct.i.actu,m.ct.i.pred)) numer <- 0 for (i in 1:par3) { print(m.ct.i.tab[i,i] / sum(m.ct.i.tab[i,])) numer <- numer + m.ct.i.tab[i,i] } print(m.ct.i.cp <- numer / sum(m.ct.i.tab)) print(m.ct.x.tab <- table(m.ct.x.actu,m.ct.x.pred)) numer <- 0 for (i in 1:par3) { print(m.ct.x.tab[i,i] / sum(m.ct.x.tab[i,])) numer <- numer + m.ct.x.tab[i,i] } print(m.ct.x.cp <- numer / sum(m.ct.x.tab)) for (i in 1:par3) { a<-table.row.start(a) a<-table.element(a,paste('C',i,sep=''),1,TRUE) for (jjj in 1:par3) a<-table.element(a,m.ct.i.tab[i,jjj]) a<-table.element(a,round(m.ct.i.tab[i,i]/sum(m.ct.i.tab[i,]),4)) for (jjj in 1:par3) a<-table.element(a,m.ct.x.tab[i,jjj]) a<-table.element(a,round(m.ct.x.tab[i,i]/sum(m.ct.x.tab[i,]),4)) a<-table.row.end(a) } a<-table.row.start(a) a<-table.element(a,'Overall',1,TRUE) for (jjj in 1:par3) a<-table.element(a,'-') a<-table.element(a,round(m.ct.i.cp,4)) for (jjj in 1:par3) a<-table.element(a,'-') a<-table.element(a,round(m.ct.x.cp,4)) a<-table.row.end(a) a<-table.end(a) table.save(a,file='mytable3.tab') } } m bitmap(file='test1.png') plot(m) dev.off() bitmap(file='test1a.png') plot(x[,par1] ~ as.factor(where(m)),main='Response by Terminal Node',xlab='Terminal Node',ylab='Response') dev.off() if (par2 == 'none') { forec <- predict(m) result <- as.data.frame(cbind(x[,par1],forec,x[,par1]-forec)) colnames(result) <- c('Actuals','Forecasts','Residuals') print(result) } if (par2 != 'none') { print(cbind(as.factor(x[,par1]),predict(m))) myt <- table(as.factor(x[,par1]),predict(m)) print(myt) } bitmap(file='test2.png') if(par2=='none') { op <- par(mfrow=c(2,2)) plot(density(result$Actuals),main='Kernel Density Plot of Actuals') plot(density(result$Residuals),main='Kernel Density Plot of Residuals') plot(result$Forecasts,result$Actuals,main='Actuals versus Predictions',xlab='Predictions',ylab='Actuals') plot(density(result$Forecasts),main='Kernel Density Plot of Predictions') par(op) } if(par2!='none') { plot(myt,main='Confusion Matrix',xlab='Actual',ylab='Predicted') } dev.off() if (par2 == 'none') { detcoef <- cor(result$Forecasts,result$Actuals) a<-table.start() a<-table.row.start(a) a<-table.element(a,'Goodness of Fit',2,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'Correlation',1,TRUE) a<-table.element(a,round(detcoef,4)) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'R-squared',1,TRUE) a<-table.element(a,round(detcoef*detcoef,4)) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'RMSE',1,TRUE) a<-table.element(a,round(sqrt(mean((result$Residuals)^2)),4)) a<-table.row.end(a) a<-table.end(a) table.save(a,file='mytable1.tab') a<-table.start() a<-table.row.start(a) a<-table.element(a,'Actuals, Predictions, and Residuals',4,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'#',header=TRUE) a<-table.element(a,'Actuals',header=TRUE) a<-table.element(a,'Forecasts',header=TRUE) a<-table.element(a,'Residuals',header=TRUE) a<-table.row.end(a) for (i in 1:length(result$Actuals)) { a<-table.row.start(a) a<-table.element(a,i,header=TRUE) a<-table.element(a,result$Actuals[i]) a<-table.element(a,result$Forecasts[i]) a<-table.element(a,result$Residuals[i]) a<-table.row.end(a) } a<-table.end(a) table.save(a,file='mytable.tab') } if (par2 != 'none') { a<-table.start() a<-table.row.start(a) a<-table.element(a,'Confusion Matrix (predicted in columns / actuals in rows)',par3+1,TRUE) a<-table.row.end(a) a<-table.row.start(a) a<-table.element(a,'',1,TRUE) for (i in 1:par3) { a<-table.element(a,paste('C',i,sep=''),1,TRUE) } a<-table.row.end(a) for (i in 1:par3) { a<-table.row.start(a) a<-table.element(a,paste('C',i,sep=''),1,TRUE) for (j in 1:par3) { a<-table.element(a,myt[i,j]) } a<-table.row.end(a) } a<-table.end(a) table.save(a,file='mytable2.tab') }
Compute
Summary of computational transaction
Raw Input
view raw input (R code)
Raw Output
view raw output of R engine
Computing time
1 seconds
R Server
Big Analytics Cloud Computing Center
Click here to blog (archive) this computation