######################## # Session 2 R code ######################## ######################## # R Packages ######################## # To list installed R packages on your computer library() # To install a new R package install.packages("randomForest") # To load an R package library("randomForest") # To list functions and datasets in a package help(package="randomForest") # To list data sets in a package data(package="randomForest") help(AirPassengers) ####################### # Read in and Write to External Files ###################### setwd("C:/Users/dzhao1/Desktop") # Read a text file RMR = read.table ("RMR.txt", header=T) # To view the data frame edit (RMR) # Read a .csv file RMR2 = read.csv ("RMR.csv", header=T) # Write to a .csv file write.csv (RMR2, "RMRout.csv", row.names=F) ####################### # Data Management ###################### #Reference: http://www.ats.ucla.edu/stat/r/modules/subsetting.htm # basic operations on data frames dim(RMR) #dimensions names(RMR) #variable names head(RMR,5) #first 5 obs tail(RMR,4) #last 4 obs # or use edit function to view the data frame edit(RMR) ##### # Subset variables rmr and age ##### # by using indices RMR.1 = RMR[, c(2,4)] edit (RMR.1) # by using variable names RMR.2 = subset (RMR, select=c(rmr, age)) edit (RMR.2) ##### # Subset observations (rows) # eg, subset all athletes ##### # by using indices RMR.3 = RMR[1:8,] edit(RMR.3) # or RMR.4 = RMR[RMR$athlete==1,] edit(RMR.4) # or more naturally RMR.5 = subset (RMR, athlete==1) RMR.5 ##### # Subset observations (rows) # eg, subset all athletes and age < 30 ##### RMR.6 = subset (RMR, athlete==1 & age <30) RMR.6 ##### # Subset observations (rows) and variables (columns) # eg, subset all athletes and age < 30, keep variable rmr, athlete, age ##### RMR.7 = subset (RMR, athlete==1 & age <30, select=c(rmr, athlete, age)) RMR.7 ##### # Merge data frames ##### # add a column of ID to RMR RMR=data.frame(ID=1:44, RMR) RMR.8=subset(RMR,select=c(ID, athlete)) RMR.9=subset(RMR,select=c(ID, age)) RMR.10=merge(RMR.8, RMR.9, by="ID") ######################## # Conditional Excecution ######################## x=2 if (x>0) {u=1; v=x } else {u=0; v=-x} u v #ifelse u = ifelse(x > 0, sqrt(x), NA) ######################## # Loops ######################## x=4:6 y=x^2 y rm(y) # for loops y=rep(NA,3) for (i in 1:3) y[i]=x[i]^2 y rm(y) # repeat y=rep(NA,3) i=1 repeat { y[i] = x[i]^2 if (i==3) break else i=i+1 } y rm(y) # while y=rep(NA,3) i=1 while(i<=3){ y[i]=x[i]^2 i=i+1 } y ######################## # R Functions ######################## # to define the function two.sample.t = function(y1, y2, alpha=.05) { n1 = length(y1); n2 = length(y2) yb1 = mean(y1); yb2 = mean(y2) s1 = var(y1); s2 = var(y2) df = n1+n2-2 s = ((n1-1)*s1 + (n2-1)*s2)/df T = (yb1 - yb2)/sqrt(s*(1/n1 + 1/n2)) p=2*pt(abs(T),df,lower.tail=F) LCI=(yb1-yb2)-qt(1-alpha/2,df)*sqrt(s*(1/n1+1/n2)) UCI=(yb1-yb2)+qt(1-alpha/2,df)*sqrt(s*(1/n1+1/n2)) CI=c(LCI,UCI) list (T=T, p=p, CI=CI, df=df, yb1=yb1, yb2=yb2, y1=y1, y2=y2) } # to apply the function y1=RMR$rmr[1:8] y2=RMR$rmr[9:44] z=two.sample.t(y1,y2) #R built-in function t.test(y1,y2, var.equal=T)