#titanic sink
#get train data
train_data<-read.csv('C:\\NotBackedUp\\fu\\temp\\Titanic Sink\\data\\train.csv')#pick vip attributes
train_data.pick <- train_datatrain_data.picktrain_data.pick$PassengerId = NULLtrain_data.pick$Name = NULLtrain_data.pick$Embarked = NULL train_data.pick$Ticket = NULL train_data.pick$Fare = NULL names(train_data.pick)##clean data
summary(train_data.pick$Survived) #1=yes 0=nosummary(train_data.pick$Pclass)#female -> 1, male -> 2
summary(train_data.pick$Sex)v = NULLfor ( i in 1:nrow(train_data.pick)) { #print(train_data.pick$Sex[i]) if(train_data.pick$Sex[i]=='male'){ v <- c(v,1) } else{ v <- c(v,2) }}train_data.pick['nSex'] = vtrain_data.pick$Sex =NULLnames(train_data.pick)summary(train_data.pick$nSex)
summary(train_data.pick$Age)#nat_mean<-mean(na.omit(train_data.pick$Age) )for (i in 1:nrow(train_data.pick)){ #print(is.na(train_data.pick$Age[i])) #print(i) if(is.na(train_data.pick$Age[i])==TRUE){ train_data.pick$Age[i]<-t_mean }}summary(train_data.pick$Age)summary(train_data.pick$SibSp)summary(train_data.pick$Parch)#Cabin to number
summary(train_data.pick$Cabin)nCabin <- NULLfor (i in 1:nrow(train_data.pick)){ t<-as.character(train_data.pick$Cabin[i]) contain_alpha <- grepl("[[:alpha:]]",t) if ( contain_alpha ){ alpha_t <- gsub("\\d+|NA", "", t) code_t <- utf8ToInt(substr(gsub("\\d+|NA", "", t),1,1))-64 }else { code_t <- 0 } nCabin <- c(nCabin,code_t)}train_data.pick["nCabin"] <-nCabintrain_data.pick$Cabin<-NULL #SUPPORT VECTOR MACHINE#library(e1071)#svm #learning from trainingsvmmodel<-svm(as.factor(Survived)~., data=train_data.pick, method="C-classification", kernel="radial",cross=5, probability=TRUE)#get test data
test_data<-read.csv('C:\\NotBackedUp\\fu\\temp\\Titanic Sink\\data\\test.csv')#pick vip attributes
test_data.pick <- test_datatest_data.pick$PassengerId = NULLtest_data.pick$Name = NULLtest_data.pick$Embarked = NULL test_data.pick$Ticket = NULL test_data.pick$Fare = NULL names(test_data.pick) ##clean data summary(test_data.pick$Survived)summary(test_data.pick$Pclass)#female -> 1, male -> 2
summary(test_data.pick$Sex)v = NULLfor ( i in 1:nrow(test_data.pick)) { #print(train_data.pick$Sex[i]) if(test_data.pick$Sex[i]=='male'){ v <- c(v,1) } else{ v <- c(v,2) }}test_data.pick['nSex'] = vtest_data.pick$Sex =NULLnames(test_data.pick)summary(test_data.pick$nSex)
summary(test_data.pick$Age)#nat_mean<-mean(na.omit(test_data.pick$Age) )for (i in 1:nrow(test_data.pick)){ if(is.na(test_data.pick$Age[i])==TRUE){ test_data.pick$Age[i]<-t_mean }}summary(test_data.pick$Age)summary(test_data.pick$SibSp)summary(test_data.pick$Parch)#Cabin to number
summary(test_data.pick$Cabin)nCabin <- NULLfor (i in 1:nrow(test_data.pick)){ t<-as.character(test_data.pick$Cabin[i]) contain_alpha <- grepl("[[:alpha:]]",t) if ( contain_alpha ){ alpha_t <- gsub("\\d+|NA", "", t) code_t <- utf8ToInt(substr(gsub("\\d+|NA", "", t),1,1))-64 }else { code_t <- 0 } nCabin <- c(nCabin,code_t)}test_data.pick["nCabin"] <-nCabintest_data.pick$Cabin<-NULLtest_data.pick$nCabin#predicting the test data
svmmodel.predict<-predict(svmmodel,test_data.pick,decision.values=TRUE)svmmodel.probs<-attr(svmmodel.predict,"decision.values")svmmodel.class<-svmmodel.predict[1:nrow(test_data.pick)]
svmmodel.truth
svmmodel.class #analyzing resulttruth<-read.csv('C:\\NotBackedUp\\fu\\temp\\Titanic Sink\\data\\gender_submission.csv')svmmodel.truth<-truth$Survivedlibrary(SDMTools)#confusion
svmmodel.confusion<-confusion.matrix(svmmodel.truth,svmmodel.class)svmmodel.accuracy<-prop.correct(svmmodel.confusion) #/tsvmmodel.recall <- #tf/t svmmodel.confusion#roc analysis for test datasvmmodel.prediction<-prediction(svmmodel.probs,svmmodel.labels)svmmodel.performance<-performance(svmmodel.prediction,"tpr","fpr")svmmodel.auc<-performance(svmmodel.prediction,"auc")@y.values[[1]]
final_result<-test_data[1:2]
final_result['Survived']<-svmmodel.classfinal_result$Pclass<-NULL
write.csv(final_result,quote = FALSE,
"C:\\NotBackedUp\\fu\\temp\\Titanic Sink\\data\\my_submission.csv", row.names=FALSE )