## Codes used in session 3 (from the slides)
## To run the code, open this script file on R (File > Open script..), and select the code you want to run (like selecting text) and press "CTRL + R".
## Generating series of numbers (vectors) of different kinds
seq(1:100) # To have sequential numbers from 1 to 100 in a vector
seq(from=1, to=3, by=0.1) # To have a series of sequential numbers starting from 1 to 3
and increasing by 0.1
runif(4) # To generate four random numbers within the default range
of 0 to 1
runif(10, min=1, max=100) # To generate 10 random numbers within the range 1 to 100
rnorm(n, mean = 0, sd = 1) # To generate "n" number of random numbers fitting to
normal distrbution with default mean=0 and sd=1
rnorm(25, 30, 5) # To generate a sample of size 25 from a normal
distribution with mean 30 and standard deviation 5
a <- rbinom(100, 10, 0.5) # 100 people tossing a fair coin (third argument=0.5) 10
times
# mean(a) should be close to 10*0.5 = 5
c(1,5,3,8) # to create a vector of these numbers
# Generate a dataset/dataframe for a case-control study:
cc <-
data.frame(case.id = 1:100,
age = rnorm(100, mean = 60, sd = 12),
caco = gl(2, 50, labels = c("Case", "Control"))) # All of this command can be written in a single line
# gl() function generates facors with levels
summary(cc) # Summary descriptive statistics of the newly generated dataset
# Generate a dataset with multiple random variables:
set.seed(955) # set.seed() makes sure that the simulated dataset will be the same everytime
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)
data <- data.frame(vvar, wvar, xvar, yvar, zvar)
head(data)
# Create a 2x2 Contingency Table:
x <- matrix(c(22, 46, 66, 58), nrow = 2) # To create a 2x2 table
# Alternatively, use the data editor:
x <- data.frame() # Assign a name to the contingency table to be created
fix(x) # Opens the data editor to enter the cell values (rxc)
X # Prints the newly created contingency table
## Factors as grouping variables (with levels)
data(iris)
str(iris) # Shows "Species" as a factor. If it wasn't a factor,
but we need it to be a factor for analysis purposes,
we would use the following command:
iris$Species <- as.factor(iris$Species)
str(iris) # Would now show as a factor (but it aleready is)
data(infert)
str(infert) # showing "case" as numerical variable
infert$case <- as.factor(infert$case) # changing "case" variable from class numerical to class factor
str(infert) # showing "case" as factor
class(infert$case) # checking the class of case variable
## apply()
data(iris)
apply(iris[-5], 2, mean) # "2" as the second argument denotes columns
## tapply()
data(iris)
tapply(iris$Petal.Length, iris$Species, mean)
## aggregate()
data(iris)
aggregate(.~Species, iris, var) # var = variance
## with()
data(infert)
with(infert, t.test(infert$parity ~ infert$case)) # parity is a count variable and case is the case-control indicator (grouping factor)
## by()
data(iris)
by(iris$Sepal.Length, iris$Species, summary) # summary statistics of Sepal.Length is requested for each Species
## Generating two (2x2) contingency tables (ct1 and ct2):
ct1 <- matrix(c(43,28,42,58), nrow=2)
ct2 <- matrix(c(53,38,52,78), nrow=2)
# Pearson's Chi-squared test:
chisq.test(ct1)
chisq.test(ct2)
# Fisher's exact test:
fisher.test(ct1)
fisher.test(ct2)
## Cochrane-Mantel-Haenszel test for combined analysis of multiple contingency tables with weights:
# First, put together multiple contingency tables (in this case, ct1 and ct2) in an array:
array1 <- array(c(ct1, ct2), dim=c(2,2,2)) # " dim = c(2,2,2) " denotes two 2x2 tables
# install the package "lawstat" if needed: install.packages("lawstat")
library("lawstat")
cmh.test(array1)
## Dose-response (trend) test using base R:
# Requires data from at least 3 groups to check linear change (or trend) in the effect size
# Generate a 5x2 table as a matrix:
matrix1 <- matrix(c(35, 82, 250, 293, 196, 190, 136, 71, 32, 13), nrow=5, byrow=TRUE)
colnames(matrix1) <- c("cases", "controls")
rownames(matrix1) <- c("0", "1", "2", "3", "4+")
matrix1 # to inspect the matrix with row and column names included
# We will need the row sums for this test:
sums <- rowSums(matrix1)
# We need to run prop.trend.test() with the arguments being x (first column numbers) and n (row totals)
prop.trend.test(x = c(35, 250, 196, 136, 32), n= sums)
# Another example for prop.trend.test() for a new 4x2 table:
prop.trend.test(x = c(47, 31, 9, 5), n = c(49, 40, 18, 10))
## Dose-response (trend) test using the R package "DescTools":
# https://www.rdocumentation.org/packages/DescTools/versions/0.99.19/topics/MHChisqTest
# A rxc (4x4) table for Job Satisfaction (from Agresti 2002, p.57):
job <- matrix( c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4,
dimnames = list(income = c("< 15k", "15-25k", "25-40k", "> 40k"),
satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS")) )
job # to inspect the 4x4 table
MHChisqTest(job, srow=c(7.5,20,32.5,60)) #runs the MH Chi-squared test for trend (degrees of freedom = 1)
# Another example for MHChisqTest() using the same matrix (matrix1) created above:
matrix1 <- matrix(c(35, 82, 250, 293, 196, 190, 136, 71, 32, 13), nrow=5, byrow=TRUE)
colnames(matrix1) <- c("cases", "controls")
rownames(matrix1) <- c("0", "1", "2", "3", "4+")
matrix1 # to inspect the matrix with row and column names included
MHChisqTest(matrix1) # which yields a very similar result to that of prop.trend.test() for matrix1 above
## Chi-squared test as goodness-of-fit test (using Mendel's results)
chisq.test(c(26, 31, 26, 27), p = c(0.25, 0.25, 0.25, 0.25)) # First four numbers are the observed counts; 0.25 is the expected proportion for all four counts
## McNemar test for matched data analysis:
# Assuming data given in ct1 is from a matched design (like controls are matched to the cases by age and gender):
mcnemar.test(ct1)
## Z-test for proportion (like Chi-squared or Fisher's exact test):
prop.test(76, 100, 0.50) # One-sample Z-test; 76 is the observed count out of the sample size of 100, and 0.50 is the expected proportion
prop.test(c(76, 50), c(100, 100)) # The proportions compared are 76/100 and 50/100
# If continuity correction is not needed, correct = FALSE argument can be added
### For a complete analysis of a 2x2 table, run the script "contingency.R" ###