##################################################################### # Introduction to R # Math 242 ##################################################################### # Run each line in this file. # Try to understand what each line does. # If you aren't sure what something does, ask about it or look it up. ##### Things to note ##### # The symbol # indicates that everything following on that line is a comment. # Functions arguments are in parentheses. # Control structure sytnax is different than in Mathematica. # The question mark opens help files, like this: ?exp ##### Basic arithmetic and numerical operations #### 2+3 10^4 sin(pi/3) exp(2) log(3) ##### Assignment to variables ##### x <- 2 # this is the preferred assignment operator in R (keyboard shortcut: Alt+- or Option+-) x x = 2 # this is also an assignment operator x 2 -> x # this is yet another assignment operator x x <- y <- 10 # you can chain assignments together x y ##### Vectors of values are very important in R ##### # these are the same: 1:10 seq(1,10) # so are these: 2*(1:5) (1:5) + (1:5) seq(1, 10, by=2) seq(1, 10, 2) # seq is very general ?seq seq(1, 10, by=.01) seq(1, 10, length.out=4) # rep for replicate or repeat: rep(3,10) rep(c(1,5), 4) rep(c(1,5), each=4) rep(seq(1,10), 4) rep(seq(1,10), each=4) ##### Operations on vectors are encouraged ##### s<-(1:10) # all of the following are OK in R: s^2 s/10 s + 100 s + s s <= 5 s %% 3 s[s<=5] sin(s) sum(s) ##### Here are some ways to create matrices ##### m <- matrix(0, nrow=4, ncol=2) m m <- matrix(1:8, nrow=4, ncol=2) m m <- matrix(1:8, byrow=T, nrow=4, ncol=2) m m[1,2] <- 100 m ##### Matrix operations are supported ##### b <- c(1,2) b <- matrix(c(1,2), nrow=2, ncol=1) m %*% b # What does this do? ##### Multi-dimensional arrays are allowed #### aa <- array(1:8, dim=c(2,2,2)) aa[1,2,1] aa ##### Here are two ways to write loops in R ##### tot <- 0 for(i in 1:10){ print(c(i, tot)) tot <- tot + i } tot tot <- 0 i <- 1 while(i <= 10){ tot <- tot + i i <- i + 1 } tot ##### Here is an if statement ##### i <- 9 if(i < 10){ print("big") }else{ print("not so big") } ##### You can write functions like this ##### # As in Mathematica, the last line of the function will be the return value. f <- function(x){ 2*x + 1 } f(1) x <- seq(0, 1, .1) f(x) ##### Variables inside functions can be local or global ##### y <- 1000 f <- function(x){ y <- x + 2 # LOCAL variable y y^2 } f(1) y y <- 1000 f <- function(x){ y <<- x + 2 # GLOBAL variable y y^2 } f(1) y ##### Here is a function that adds up the first n numbers ##### ##### that are divisible by k ##### sumToN <- function(n,k){ tot <- 0 for(i in 1:n){ if(i %% k == 0) tot <- tot + i } tot } sumToN(10,1) sumToN(10,2) sumToN(1000,2) sumToN(1000000,2) sumToN(2000000,3) sumToN(1000000000,5) #bad idea...hit the stop sign ##### Here is a faster way to do the same thing ##### n <- 100 k <- 4 s <- k*seq(1,n/k) sum(as.numeric(s)) # put it in a function: sumToN2 <- function(n,k){ sum(as.numeric(k*seq(1,n/k))) } sumToN2(100,4) # compare the runtimes of our two functions: m <- 1000000 k <- 4 system.time(s1 <- sumToN(m,k)) system.time(s2 <- sumToN2(m,k)) c(s1,s2) ##### Fibonacci numbers ##### # Compute them recursively: # fib(0) = 0 # fib(1) = 1 # fib(n) = fib(n-1) + fib(n-2) fib <- function(n){ # # write a function here # that will recursively # compute the Fibonacci numbers # } fib(3) fib(5) fib(10) # Compute them iteratively: # initialize a<-0, b<-1, c<-1 # loop some number of times: # a<-b # b<-c # c<-a+b # return c fib2 <- function(n){ # # write a function here # that will iteratively # compute the Fibonacci numbers # } fib2(15) fib2(50) fib2(100) # be careful...numerical values in R have limited precision ##### Sieve of Eratosthenes ##### # here is one implementation: sieveOfEros <- function(n){ maxVal <- sqrt(n) boolVals <- rep(T,n) for(i in 2:maxVal){ for(j in seq(i^2,n,by=i)){ boolVals[j] <- F } } primes <- (1:n)[boolVals] primes[-1] # what does this do? } sieveOfEros(100) primes <- sieveOfEros(1000) primes m <- 100000 system.time(primes <- sieveOfEros(m)) pN <- length(primes) primes[(pN-10):pN] # here is a faster implementation: sieveOfEros2 <- function(n){ currPrime <- 2 maxVal <- floor(sqrt(n)) allValues <- 3:n retList <- c(currPrime) while(currPrime <= maxVal){ allValues <- allValues[allValues %% currPrime != 0] currPrime <- allValues[1] retList <- c(retList,currPrime) allValues <- allValues[-1] } c(retList,allValues) } m <- 10000 primes <- sieveOfEros2(m) # compare runtimes m <- 1000000 system.time(primes <- sieveOfEros(m)) system.time(primes <- sieveOfEros2(m)) # here is yet another implementation: sieveOfEros3 <- function(n){ currPrime <- 2 maxVal <- floor(sqrt(n)) allValues <- 3:n retList <- c(currPrime) while(currPrime <= maxVal){ multVals <- currPrime*(1:(n/currPrime)) allValues <- setdiff(allValues,multVals) currPrime <- allValues[1] retList <- c(retList,currPrime) allValues <- allValues[-1] } c(retList,allValues) } m <- 1000 primes <- sieveOfEros(m) primes3 <- sieveOfEros3(m) primes == primes3 setdiff(primes,primes3) # compare runtimes m<-1000000 system.time(primes<-sieveOfEros(m)) system.time(primes<-sieveOfEros2(m)) system.time(primes<-sieveOfEros3(m)) ##### Basic plotting in R ##### xvals <- seq(0,pi,.01) yvals <- sin(xvals) plot(yvals) plot(xvals,yvals) plot(xvals,yvals,type="l") ##### Use ggplot for better plotting capabilities ##### # first load the package library(ggplot2) # NOTE: If the package didn't load, you probably need to install it. # Fortunately, RStudio makes this easy, via the package manager at right. # a basic plot ggplot(data=NULL, aes(x=xvals,yvals)) + geom_line(color="red") + ggtitle("Here is a plot") # ggplot likes data frames dat.df <- data.frame(x=xvals,y=yvals) ggplot(dat.df, aes(x,y)) + geom_line(color="red") + ggtitle("Here is a plot") ##### We will use R to simulate various random phenomena. ##### ##### For now, we will create random walks. ##### nSteps <- 1000 # a random walk that goes up or down with equal probability y0 <- 0 yList <- c() for(i in 2:nSteps){ step <- sample(c(-1,1),1) # go up or down y0 <- y0 + step yList <- c(yList,y0) } plot(yList,type="l") # a slightly different implementation y0 <- 0 yList <- rep(0,nSteps) for(i in 2:nSteps){ step <- sample(c(-1,1),1) y0 <- y0 + step yList[i] <- y0 } plot(yList,type="l") # now for a 2-D random walk # this random walk goes up, down, left, and right with equal probability pt0 <- c(0,0) ptList <- matrix(0, nrow=nSteps, ncol=2) steps <- matrix(c(1,0,0,1,-1,0,0,-1), byrow=T, nrow=4, ncol=2) ptList # list is initially all zeros steps # four possible directions to move for(i in 2:nSteps){ step <- sample(1:4,1) pt0 <- pt0 + steps[step,] ptList[i,] <- pt0 } ptList # now this contains the random walk # try to plot the ranom walk dat.df <- data.frame(x=ptList[,1], y=ptList[,2]) ggplot(dat.df) + geom_line(aes(x,y)) # better to use geom_segment dat.df <- data.frame(x0=ptList[1:(nSteps-1),1], x1=ptList[2:nSteps,1], y0=ptList[1:(nSteps-1),2], y1=ptList[2:nSteps,2]) ggplot(dat.df) + geom_segment(aes(x=x0,xend=x1,y=y0,yend=y1)) # now write a function to generate a random walk buildRandomWalk <- function(nSteps){ pt0 <- c(0,0) ptList <- matrix(0, nrow=nSteps, ncol=2) steps <- matrix(c(1,0,0,1,-1,0,0,-1), byrow=T, nrow=4, ncol=2) for(i in 2:nSteps){ step <- sample(1:4,1) pt0 <- pt0 + steps[step,] ptList[i,] <- pt0 } ptList } # ...and a function to plot the random walk plotRandomWalk <- function(ptList){ nSteps <- nrow(ptList) dat.df <- data.frame(x0=ptList[1:(nSteps-1),1], x1=ptList[2:nSteps,1], y0=ptList[1:(nSteps-1),2], y1=ptList[2:nSteps,2]) ggplot(dat.df) + geom_segment(aes(x=x0,xend=x1,y=y0,yend=y1)) } # try it out pts <- buildRandomWalk(100) pts dim(pts) plotRandomWalk(pts) pts<-buildRandomWalk(10000) plotRandomWalk(pts) # now, get some data about your random walk # How wide is the random walk? max(pts[,1]) - min(pts[,1]) # How tall is the random walk? max(pts[,2]) - min(pts[,2]) # The "diameter" of the walk is the larger of the previous two. diamRandowWalk <- function(pts){ max( max(pts[,1]) - min(pts[,1]), max(pts[,2]) - min(pts[,2]) ) } diamRandowWalk(pts) # make a new random walk and get its diameter m <- 10000 pts <- buildRandomWalk(m) diamRandowWalk(pts) # make a bunch of random walks and find their average diameter aveDiam <- function(m,K=100){ vals <- rep(0,K) for(k in 1:K) vals[k] <- diamRandowWalk(buildRandomWalk(m)) mean(vals) } aveDiam(100) aveDiam(1000) aveDiam(10000) aveDiam(100000) ##### Exercise: ##### ##### Plot the aveDiam as a function of m. ##### ##### Make a conjecture for the average growth rate. #####