Apply, TApply, LApply, Vapply, Ftable, xtab and aggregate functions are very important for data transformation. These are basic data processing functions. Script used in above lecture is mentioned below:
#Yogesh Mehla
#yogesh.mehla@gmail.com
#+91-9023262520
#apply, lapply, sapply, tapply, aggregate, xtab
#generating matrix(cross tab reports)
CricData<-read.csv('G:/R/Rpractice.csv')
CricData
for(i in 1:length(CricData))
{
print(class(CricData[,i]))
}
CricData[,2]<-as.character(CricData[,2])
CricData[,3]<-as.character(CricData[,3])
CricData[,4]<-as.character(CricData[,4])
CricData[,7]<-as.character(CricData[,7])
for(i in 1:length(CricData))
{
print(class(CricData[,i]))
}
for(i in 1:length(CricData))
{
if(class(CricData[,i])=="integer" || class(CricData[,i])=="numeric")
{
print(paste("mean of column ", colnames(CricData[,i]), " is " ,mean(CricData[,i])))
}
else
{
print(paste("Column ", colnames(CricData[,i]), " is of type",class(CricData[,i]) ))
}
}
#apply implement function on either row or column depending upon MARGIN parameter
apply(mtcars, 2, class)
apply(mtcars,2, mean)
apply(mtcars,2, name <- function(x) {
print(paste("mean is ",mean(x)))
})
apply(CricData,2, class)
apply(CricData[5:6],2,class)
#sapply it by default implement function on column, no MARGIN parameter is available for sapply
sapply(CricData,name <- function(x) {
print(paste("mean is ",mean(x)))
})
sapply(CricData,name <- function(x) {
if(class(x)=="integer" || class(x)=="numeric")
{
print(paste("mean of column ", colnames(x), " is " ,mean(x)))
}
else
{
print(paste("Column ", colnames(x), " is of type",class(x) ))
}
})
apply(CricData[5:6],2,mean)
fix(CricData)
apply(CricData[5:6],2,mean,na.rm=TRUE)
#lapply it by default implement function on column, no MARGIN parameter is available for lapply
#lapply always return data in form of list
lapply(CricData[5:6],mean,na.rm=TRUE)
#unlist of returned output
unlist(lapply(CricData[5:6],mean,na.rm=TRUE))
CricData$Franchise<-as.character(CricData$Franchise)
unique(CricData$Franchise)
OutputList<-lapply(unique(CricData$Franchise), name <- function(x) {
CricData[CricData$Franchise==x,]
})
OutputList[1]
UniqueFranchise<-unique(CricData$Franchise)
OutputList<-lapply( setNames(UniqueFranchise,UniqueFranchise), function(x) {
CricData[CricData$Franchise==x,]
})
OutputList$Jaipur
#split function do similar task
OutputSplit<-split(CricData,CricData$Franchise)
OutputSplit$Delhi
class(OutputList)
class(OutputSplit)
unsplit(OutputSplit,CricData$Franchise)
unsplit(OutputList,CricData$Franchise)
#vapply is similar to sapply but you need to specify return type
vapply(CricData[5:6], mean ,numeric(1))
#tapply group values one base of INDEX parameter. tapply returns array
tapply(CricData$Price, CricData$Specialty,sum)
tapply(CricData$Price, list(CricData$Franchise, CricData$Specialty),sum)
tapply(CricData$Price, list(Franchise=CricData$Franchise, Specialty=CricData$Specialty),sum)
CricData[order(CricData$Franchise),]
CricData[order(CricData$Franchise,CricData$Specialty),]
CricData[order(-as.numeric(as.factor( CricData$Franchise)),CricData$Specialty,decreasing = FALSE),]
#tapply splitting into multiple groups or aggregating data in 3 dimensions
outputTapply<-tapply(CricData$Price, list(Country=CricData$Country, Specialty=CricData$Specialty,GFranchise=CricData$Franchise),sum)
outputTapply
outputTapply[1:10,1:5,"Mumbai"]
dt<-as.data.frame( outputTapply[1:8,1:5,"Mumbai"])
dt
(outputTapply[,,3])
dim(outputTapply)[3]
d<-lapply(1:dim(outputTapply)[3],function (x) {
outputTapply[,,x]
})
d
d<-lapply(setNames(1:dim(outputTapply)[3],UniqueFranchise),function (x) {
outputTapply[,,x]
})
d$Bangalore
#ftable function multidimension output to flat table.
ftable(outputTapply)
#xtabs function do cross tab sum
OutputxTab<-xtabs( CricData$Price ~ CricData$Country + CricData$Franchise + CricData$Specialty ,CricData)
OutputxTab
OutputtApplyXTab<-tapply(CricData$Price, list(Country=CricData$Country,GFranchise=CricData$Franchise, Specialty=CricData$Specialty),sum)
ftable(OutputtApplyXTab)
ftable(OutputxTab)
#aggregate function
aggregate(CricData$Price,by=list(CricData$Franchise,CricData$Specialty), FUN=mean,na.rm=TRUE)
aggregate(CricData$Price ~ CricData$Country +CricData$Franchise+ CricData$Specialty,CricData,sum)