## Codes used in session 1 (from the slides) 
## Please go through these codes either while going through the slides or at the end of the session
## To run the code, open this script file on R (File > Open script..), and select the code you want to run (like selecting text) and press "CTRL + R".

## Simple arithmetic:
2 * 2 
1897985645567.98 * 3465.96873 
log(10)
log10(10)

## However, try this:
a <- sqrt(2)
a * a == 2
# What happened?
# Now try:
(b <- a*a)
# Is R stupid? 

## To see a list and brief descriptions of built-in R datasets: 
data()

## To load the dataset called "iris":
data(iris) 

## To see the whole dataset on the screen (to print the dataset):
iris   # this will print the whole dataset (dataframe) on the screen 

## To explore the dataframe:
dim(iris)
names(iris)
colnames(iris)
rownames(iris)
head(iris)
tail(iris)
str(iris)

Str(iris)  # Will not work due to case-sensitivity of R. Should have been " str(iris) "

## Some basic analyses and graphics on iris dataframe: 

summary(iris)      # summary metrics for each column (variable) 
boxplot(iris)      # boxplot of each column (variable)
boxplot(iris[-5])  # boxplot of each column (variable) except the fifth column; equivalent to iris[1:4]
boxplot(iris[-5], col = "red")    # adding color
abline(h = 3.5, col = "red")      # adding a horizontal line
boxplot(iris$Sepal.Length ~ iris$Species)    # boxplot of Sepal Length data for each species (n=3) separately 
hist(iris$Sepal.Length)           # histogram of Sepal Length (all species) 
pairs(iris[-5])    # pairwise correlation scatterplot matrix for the variables 1 to 4 in the dataframe
my_cols <- c("red", "blue", "green") # this line creates a set of colour to be used in the next line (just carry on)
pairs(iris[1:4], pch = 19, cex = 0.5, col =   my_cols[iris$Species], lower.panel=NULL)    # the same scatterplot matrix (of columns 1:4) with some formatting and excluding the lower panel

## A more sophisticated version of the correlation scatterplot matrix using the R package "psych"
## If not already installed, type: install.packages("psych") 

library("psych")
pairs.panels(iris)
pairs.panels(iris, method = "spearman")

## R uses an aRrow   <-   as an assignment operator:
x <- 5  # x is assigned a value of 5
x * 6   # x is multiplied by 6
30      # result is 30

## Another example:
df <- iris       # assigning the dataframe "iris" to a variable df
boxplot(df[-5])  # boxplot of df excluding the fifth column; equivalent to df[1:4] 

## You can split a command line to as many pieces as you like with no harm:
boxplot(iris[1:4], 
   boxwex = .2,     # you can even insert a comment in the middle of a command (at the end of a line before the next option in the next line)
   pch =       14)

## The above script is equivalent to:
boxplot(iris[1:4], boxwex = .2, pch = 14)

## You can also join multiple commands in different lines to a single line command separated by semicolons: 
data(iris); boxplot(iris[-5]); abline(h=2.5)

## The above script is equivalent to:
data(iris)
boxplot(iris[-5])
abline(h = 2.5)

## How to enter data: 
## For a small dataset (vector), create an R object using  c():
x <- c(2, 4, 3, 5, 1, 4, 7, 2, 5, 3, 8, 5, 6, 9, 2)
boxplot(x)

## For a data frame, create multiple vectors for rows or columns, and bind them using  rbind() / cbind() or  data.frame() functions.
row1 <- c(2, 4, 6, 8)
row2 <- c(3, 6, 9, 12)
matrix1 <- rbind(row1, row2)    # creates matrix1 with two rows and four columns 
matrix1   # this will print the newly created matrix on the screen 

## For a matrix, use the  matrix() function: 
      x <- matrix(c(2,12,6,10), nrow=2, byrow=TRUE)  # or try the next one to see the difference         
      x <- matrix(c(2,12,6,10), nrow=2, byrow=FALSE)
matrix2 <- matrix(c(5, 10, 15, 20, 6, 12, 18, 24), nrow = 2, byrow = TRUE)   # creates matrix2 with two rows and four columns 

## For an array, use the array() function: 
      array1 <- array(c(matrix1, matrix2), dim=c(2,4,2))    # creates a new array (named array1) with two matrices of two rows and four columns each
      array1     # prints array1 as two separate matrices 

## multiple matrices (of 2 rows and 4 columns) are merged to create an array: two 2x4 tables (hence, dim = c(2,4,2)) 

## To create a 2x2 (contingency) table, create a matrix:

x  <-  matrix(c(22, 46, 66, 58), nrow = 2)     

## R can also provide a blank spreadsheet to enter the numbers: 

x <- data.frame()  # assigns a name to the contingency table to be created 
fix(x)  # opens the data editor to enter the cell values (rxc)

## To see the newly created contingency table: 

x	# prints the newly created contingency table

## To use the contingency table, for example, for Fisher's test, use the assigned name of the 2x2 table (x):

fisher.test(x)

## CSV files:
## Save your Excel file as CSV in the working directory (if in doubt, check with:  getwd() ) 

## Use the read.csv() function to read your file into R (ideally, assign it to a name)
## Change filename to the name of your file which is saved in working directory as a CSV file:

file <- read.csv("filename.csv", header = TRUE)

## Explore your imported CSV file (which is in the memory with the name "file")

dim(file) 
str(file) 
names(file)
head(file)
tail(file)
summary(file)

## You can also export (save) an R dataframe (like iris) as a CSV file (by default to the working directory):  
  write.csv(iris, "iris.csv")
## To import it as a CSV file:
  iris_csv <- read.csv("iris.csv")

## XLSX files:
## Make sure your excel file is in the working directory:
library("readxl")    # loads the package "readxl" 
file <- read_excel("filename.xlsx")    # reads your Excel file into R and assigns the name "file" 
file   # prints ypur file on the screen, or explore it using dim(), head(), tail() etc.  

## You can also read SPSS, SAS or Stata files into R if needed

## For the rest of the day, create a folder R in C drive and then set the default (working) directory to C:/R for the current session:
  setwd("C:/R")     # the change is only valid for the current session
## Check that your working directory is changed to "C:/R" :
  getwd()

## file.choose() to select a file from anywhere on the computer:
   x <- read.csv(file.choose(), header=TRUE)    # if no header, use FALSE   
   dim(x)  # to check the dimensions of the imported CSV file 

#####################################