Cancer <- read.table("http://www.cse.chalmers.se/~chrdimi/downloads/fouille/cancer.txt") ## http://bit.ly/1OvaqCP ## Let us count the incidence of cancer in our dataset nData <- dim(Cancer)[1] nCancer <- sum(Cancer$cancer == 1) pCancer <- nCancer / nData print("Proportion of cancer patients") print(pCancer) ## First, count how many males nad females there are nMales <- sum(Cancer$sex == "male") nFemales <- sum(Cancer$sex == "female") ## First, count how many males have cancer maleCancer <- sum(Cancer[Cancer$sex=="male",]$cancer) ## Then, count how many females have cancer femaleCancer <- sum(Cancer[Cancer$sex=="female",]$cancer) print("Male cancer incidence") print(pCancerMale) print("Female cancer incidence") print(pCancerFemale) pCancerMale <- maleCancer / nMales pCancerFemale <- femaleCancer / nFemales pCancer <- (maleCancer + femaleCancer) / (nMales + nFemales) print("Overall cancer incidence (same as before)") print(pCancer) ## Select only the data pertaining to age and cancer X = Cancer[c("age", "cancer")] ## example of how we can actually perhaps classify by age pdf("agePlot1.pdf", width = 4, height = 4) plot(X[1:10,]) dev.off() pdf("agePlot2.pdf", width = 4, height = 4) plot(X[1:20,]) dev.off() ## example of how we can actually perhaps classify by age ##plot(X[1:20,])