#results first assignment (baseline)
firstAssignment <- c(8, 14.5, 9, 13.5, 16, 10.5, 13.5, 15.5, 12.5, 13.5, 8, 8, 8, 11, 11.5, 11, 12, 12.5, 11.5, 11.5, 11, 11.5, 8, 11.5, 12, 7, 13, 12, 8.5, 10.5, 14, 15, 11.5, 8, 12.5)
summary (firstAssignment)
#results second assignment (expected improvement) <- YOUR SW
secondAssignment <- c(14, 17, 11.5, 14, 14.5, 8, 14.5, 16, 15.5, 0, 14, 0, 11.5, 10.5, 14, 13.5, 17, 14, 14, 10, 13.5, 16.5, 12.5, 14.5, 9, 14, 17.5, 13.5, 14, 14, 15, 13.5, 9.5, 0, 15 )
summary (secondAssignment)
#----------------- same distributions without outliers (students with 0 scores)
#results first assignment (baseline)
firstAssignment <- c(8, 14.5, 9, 13.5, 16, 10.5, 13.5, 15.5, 12.5, 8, 8, 11, 11.5, 11, 12, 12.5, 11.5, 11.5, 11, 11.5, 8, 11.5, 12, 7, 13, 12, 8.5, 10.5, 14, 15, 11.5, 12.5)
summary (firstAssignment)
#results second assignment (expected improvement) <- YOUR SW
secondAssignment <- c(14, 17, 11.5, 14, 14.5, 8, 14.5, 16, 15.5, 14, 11.5, 10.5, 14, 13.5, 17, 14, 14, 10, 13.5, 16.5, 12.5, 14.5, 9, 14, 17.5, 13.5, 14, 14, 15, 13.5, 9.5, 15 )
summary (secondAssignment)
#directional hypothesis (significance)
#H1: scores 2nd > scores 1st ---> Accepted
#H0: scores 2nd = scores 1st ---> rejection
#histogram first assignment's results
hist(firstAssignment,
main="Histogram of 1st assignments score - average of class marks",
xlab="scores",
border="blue",
col=rgb(1,0,0,0.5),
xlim=c(0,20),
las=1,
breaks=28
)
legend("topright", c("First", "Second"), fill=c(rgb(1,0,0,0.5), rgb(0,0,1,0.5)))
#histogram second assignment's results
hist(secondAssignment,
xlab="scores",
border="blue",
col=rgb(0,0,1,0.5),
xlim=c(0,20),
las=1,
breaks=28,
add=T
)
#box-plots
boxplot(firstAssignment, secondAssignment, main = "1st vs 2nd assignment scores",
names = c("first", "second"),
ylab = "scores")
#density plots
d1 <- density(firstAssignment)
plot(d1,main="Density plot of 1st vs 2nd assignment scores", ylim=c(0,0.40), xlim=c(0,20), xlab="students", col=rgb(1,0,0,0.5))
polygon(d1, col=rgb(1,0,0,0.5), border=rgb(1,0,0,0.5)) #color it inside
legend("topright", c("First", "Second"), fill=c(rgb(1,0,0,0.5), rgb(0,0,1,0.5)))
d2 <- density(secondAssignment)
lines(d2, add=TRUE, ylim=c(0,0.20), xlim=c(0,20), xlab="students")
polygon(d2, col=rgb(0,0,1,0.5), border=rgb(0,0,1,0.5))
#overlap -> [[alpha=0.05]] alpha=0.01 alpha=0.1 alpha=0.001
#comparison of distributions (t-test)
#assumption paired: yes (dependent)
shapiro.test(firstAssignment)
# RESULTS => From the output, the p-value > 0.05 implying that the distribution of the data IS NOT significantly different from normal distribution.
#In other words, we can assume the normality.
shapiro.test(secondAssignment)
# RESULTS => From the output, the p-value < 0.05 implying that the distribution of the data IS significantly different from normal distribution.
#In other words, we can assume the normality HAS BEEN VIOLATED.
#AS A CONSEQUENCE, since one of the 2 distributions is violated, we cannot use the dependent t-test,
#but rather the wilcoxon-test (signed-rank test)
#We want to know whether the median score of the first assignment differs from the median score of the second assignment
res <- wilcox.test(firstAssignment, secondAssignment, paired = TRUE)
res$p.value
#The p-value of the test is less than the significance level [[alpha = 0.05]].
#We can conclude that the median weight of the marks from the class is significantly different from the median marks from second class
#with a p-value = 0.0003681562
#-------------------------------------------------
#we would like to check if there is a correlation between the scores of the first assignment vs those of the second assignment
#scatterplot first vs second
plot(firstAssignment, secondAssignment, main="Scatterplot first vs second assignment scores",
xlab="age ", ylab="IQ", xlim=c(0,20), ylim=c(0,20))
fitline <- lm(firstAssignment ~ secondAssignment)
abline(fitline, col = "blue")
cor(firstAssignment, secondAssignment, use="complete.obs", method="spearman") #correlation coefficient=0.4997333 #possible values [-1,1]
#interpretation of corr. coeff. -> https://www.google.com/search?q=rule+of+thumbs+interpretation+correlation&client=firefox-b-d&source=lnms&tbm=isch&sa=X&ved=2ahUKEwie9_SXoNL0AhULCsAKHVqWCXMQ_AUoAXoECAEQAw&biw=805&bih=1151&dpr=0.8#imgrc=Z-sNGC63IxFmVM
#effect size