## Codes used in session 1 (from the slides) ## Please go through these codes either while going through the slides or at the end of the session ## To run the code, open this script file on R (File > Open script..), and select the code you want to run (like selecting text) and press "CTRL + R". ## Simple arithmetic: 2 * 2 1897985645567.98 * 3465.96873 log(10) log10(10) ## However, try this: a <- sqrt(2) a * a == 2 # What happened? # Now try: (b <- a*a) # Is R stupid? ## To see a list and brief descriptions of built-in R datasets: data() ## To load the dataset called "iris": data(iris) ## To see the whole dataset on the screen (to print the dataset): iris # this will print the whole dataset (dataframe) on the screen ## To explore the dataframe: dim(iris) names(iris) colnames(iris) rownames(iris) head(iris) tail(iris) str(iris) Str(iris) # Will not work due to case-sensitivity of R. Should have been " str(iris) " ## Some basic analyses and graphics on iris dataframe: summary(iris) # summary metrics for each column (variable) boxplot(iris) # boxplot of each column (variable) boxplot(iris[-5]) # boxplot of each column (variable) except the fifth column; equivalent to iris[1:4] boxplot(iris[-5], col = "red") # adding color abline(h = 3.5, col = "red") # adding a horizontal line boxplot(iris$Sepal.Length ~ iris$Species) # boxplot of Sepal Length data for each species (n=3) separately hist(iris$Sepal.Length) # histogram of Sepal Length (all species) pairs(iris[-5]) # pairwise correlation scatterplot matrix for the variables 1 to 4 in the dataframe my_cols <- c("red", "blue", "green") # this line creates a set of colour to be used in the next line (just carry on) pairs(iris[1:4], pch = 19, cex = 0.5, col = my_cols[iris$Species], lower.panel=NULL) # the same scatterplot matrix (of columns 1:4) with some formatting and excluding the lower panel ## A more sophisticated version of the correlation scatterplot matrix using the R package "psych" ## If not already installed, type: install.packages("psych") library("psych") pairs.panels(iris) pairs.panels(iris, method = "spearman") ## R uses an aRrow <- as an assignment operator: x <- 5 # x is assigned a value of 5 x * 6 # x is multiplied by 6 30 # result is 30 ## Another example: df <- iris # assigning the dataframe "iris" to a variable df boxplot(df[-5]) # boxplot of df excluding the fifth column; equivalent to df[1:4] ## You can split a command line to as many pieces as you like with no harm: boxplot(iris[1:4], boxwex = .2, # you can even insert a comment in the middle of a command (at the end of a line before the next option in the next line) pch = 14) ## The above script is equivalent to: boxplot(iris[1:4], boxwex = .2, pch = 14) ## You can also join multiple commands in different lines to a single line command separated by semicolons: data(iris); boxplot(iris[-5]); abline(h=2.5) ## The above script is equivalent to: data(iris) boxplot(iris[-5]) abline(h = 2.5) ## How to enter data: ## For a small dataset (vector), create an R object using c(): x <- c(2, 4, 3, 5, 1, 4, 7, 2, 5, 3, 8, 5, 6, 9, 2) boxplot(x) ## For a data frame, create multiple vectors for rows or columns, and bind them using rbind() / cbind() or data.frame() functions. row1 <- c(2, 4, 6, 8) row2 <- c(3, 6, 9, 12) matrix1 <- rbind(row1, row2) # creates matrix1 with two rows and four columns matrix1 # this will print the newly created matrix on the screen ## For a matrix, use the matrix() function: x <- matrix(c(2,12,6,10), nrow=2, byrow=TRUE) # or try the next one to see the difference x <- matrix(c(2,12,6,10), nrow=2, byrow=FALSE) matrix2 <- matrix(c(5, 10, 15, 20, 6, 12, 18, 24), nrow = 2, byrow = TRUE) # creates matrix2 with two rows and four columns ## For an array, use the array() function: array1 <- array(c(matrix1, matrix2), dim=c(2,4,2)) # creates a new array (named array1) with two matrices of two rows and four columns each array1 # prints array1 as two separate matrices ## multiple matrices (of 2 rows and 4 columns) are merged to create an array: two 2x4 tables (hence, dim = c(2,4,2)) ## To create a 2x2 (contingency) table, create a matrix: x <- matrix(c(22, 46, 66, 58), nrow = 2) ## R can also provide a blank spreadsheet to enter the numbers: x <- data.frame() # assigns a name to the contingency table to be created fix(x) # opens the data editor to enter the cell values (rxc) ## To see the newly created contingency table: x # prints the newly created contingency table ## To use the contingency table, for example, for Fisher's test, use the assigned name of the 2x2 table (x): fisher.test(x) ## CSV files: ## Save your Excel file as CSV in the working directory (if in doubt, check with: getwd() ) ## Use the read.csv() function to read your file into R (ideally, assign it to a name) ## Change filename to the name of your file which is saved in working directory as a CSV file: file <- read.csv("filename.csv", header = TRUE) ## Explore your imported CSV file (which is in the memory with the name "file") dim(file) str(file) names(file) head(file) tail(file) summary(file) ## You can also export (save) an R dataframe (like iris) as a CSV file (by default to the working directory): write.csv(iris, "iris.csv") ## To import it as a CSV file: iris_csv <- read.csv("iris.csv") ## XLSX files: ## Make sure your excel file is in the working directory: library("readxl") # loads the package "readxl" file <- read_excel("filename.xlsx") # reads your Excel file into R and assigns the name "file" file # prints ypur file on the screen, or explore it using dim(), head(), tail() etc. ## You can also read SPSS, SAS or Stata files into R if needed ## For the rest of the day, create a folder R in C drive and then set the default (working) directory to C:/R for the current session: setwd("C:/R") # the change is only valid for the current session ## Check that your working directory is changed to "C:/R" : getwd() ## file.choose() to select a file from anywhere on the computer: x <- read.csv(file.choose(), header=TRUE) # if no header, use FALSE dim(x) # to check the dimensions of the imported CSV file #####################################