Я ценю ответы, которые были забитыми положительно со мной, а также помог мне решить эту выходку следующим образом:
#using the exemple data
#getting the individuals average:
mRNA_expression<- data.frame(genes=genes[,1], Means=rowMeans(genes[,-1]))
Protein_abundance<- data.frame(protein=protein[,1], Means=rowMeans(protein[,-1]))
#merging both to do the correlation graph
mean_corr <- data.frame(mRNA_expression[,2],Protein_abundance[,2])
names(mean_corr) <- c("mRNA_expression","Protein_abundance")
#deleting NA lines
mean_corr <- mean_corr[complete.cases(mean_corr),]
#appling log10
mean_corr <- log10 (mean_corr)
library(ggplot2)
#to check the distribution
ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") + theme(axis.title.y=element_text(margin=margin(0,10,0,0))) + theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
geom_point(shape=1) # Use hollow circles
#Different kind of plots::
ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") + theme(axis.title.y=element_text(margin=margin(0,10,0,0))) + theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method=lm) # Add linear regression line
# (by default includes 95% confidence region)
ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression))+ labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") + theme(axis.title.y=element_text(margin=margin(0,10,0,0))) + theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method=lm, # Add linear regression line
se=FALSE) # Don't add shaded confidence region
ggplot(mean_corr, aes(x=Protein_abundance, y=mRNA_expression)) + labs(x = "Protein abundance (log10)", y = "mRNA expression (log10)") + theme(axis.title.y=element_text(margin=margin(0,10,0,0))) + theme(axis.title.x=element_text(margin=margin(10,0,0,0))) +
geom_point(shape=1) + # Use hollow circles
geom_smooth() # Add a loess smoothed fit curve with confidence region
#statistics
#to check the correlation
cor(mean_corr)
#linear regression
#lm(genes_mean ~ protein$mean, data=mean_corr)
lm(Protein_abundance ~ mRNA_expression, data=mean_corr)
отправной точки: данные <- cbind (гены, белок) участок (данные) – AidanGawronski