Regression models
# make function for regression
protein_list = colnames(proteins_dat)[c(2:2912)]
run_limma2 = function(phenotype_col, limma_dat = pheno, plot_prefix = "full",reflevel, nonref){
dat = limma_dat %>%
filter(MS_status=="prevalent") %>%
filter(!is.na(sex_f31_0_0) & !is.na(age_when_attended_assessment_centre_f21003_0_0) & !is.na(.data[[phenotype_col]]))
dat_just_proteins = dat %>% dplyr::select(all_of(protein_list))
dat$agesq = dat$age_when_attended_assessment_centre_f21003_0_0^2
# set row names
rownames(dat_just_proteins) = dat$EID
# mean imputation
dat_just_proteins = dat_just_proteins %>%
mutate_all(~ifelse(is.na(.x),
mean(.x, na.rm = TRUE), .x))
# design
dat[[phenotype_col]] = relevel(
factor(
as.character(dat[[phenotype_col]])
),
ref = reflevel
)
table(dat[[phenotype_col]])
fmla1 = formula(
paste0("~0 + ",phenotype_col," + Batch + age_when_attended_assessment_centre_f21003_0_0 + sex_f31_0_0 + age_when_attended_assessment_centre_f21003_0_0:sex_f31_0_0 + agesq + agesq:sex_f31_0_0")
)
design <- model.matrix(fmla1, dat)
colnames(design) = str_remove_all(colnames(design),phenotype_col)
colnames(design)[ncol((design))-1]="agesex"
colnames(design)[ncol((design))]="agesq_sex"
fit = lmFit(t(dat_just_proteins), design) # fit model
contr_string = paste0("makeContrasts(",
nonref,
" - ",
reflevel,
",levels = design)"
)
message(contr_string)
contr <-
eval(parse(text=contr_string))
tmp <- contrasts.fit(fit, contr) # Estimate contrast for each protein
tmp <- eBayes(tmp) # EB smooth
# process results
pvals = tmp$p.value %>%
data.frame() %>%
mutate(protein = rownames(tmp$p.value))
colnames(pvals) = c("pval","protein")
betas = tmp$coefficients %>%
data.frame() %>%
mutate(protein = rownames(tmp$coefficients))
colnames(betas) = c("beta","protein")
overall_res = betas %>% left_join(pvals,by="protein") %>%
mutate(fdr = p.adjust(pval,method="fdr"),
bonf = p.adjust(pval,method="bonf")
)
short_pheno = str_remove_all(phenotype_col,"_status")
plot_dat = overall_res %>%
mutate(sig = case_when(
bonf < 0.05 & beta > 0 ~ "Up",
bonf < 0.05 & beta < 0 ~ "Down",
bonf >= 0.05 ~ "NS"))
plot_dat %>% arrange(pval)
col_vec = c("blue","grey","red")
names(col_vec) = c("Down","NS","Up")
p = ggplot(plot_dat,
aes(beta,-log10(pval),col = sig))+
geom_point()+
scale_color_manual(values = col_vec)+
theme_minimal()+
ggrepel::geom_text_repel(data = plot_dat %>% filter(bonf < 0.05),
mapping = aes(beta, -log10(pval),label = protein),
show.legend = F,force = 10,min.segment.length = 0,max.overlaps = 10,max.time = 1000,max.iter = 1e9)+
theme(panel.grid = element_blank())+
geom_hline(yintercept = -log10(bonf),linetype="dashed",alpha=0.3)+
geom_vline(xintercept = 0,linetype="dashed",alpha=0.3)+
ggtitle(paste0(phenotype_col,"\nN=",nrow(dat)))+
labs(x="Log fold change",y = "-log10(P)",col="Direction")
p
write_csv(overall_res,paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".csv"))
write_csv(overall_res %>%
filter(bonf < 0.05) %>%
arrange(desc(beta)),paste0("~/ukb_proteomics/outputs/sig_limma_results_",plot_prefix,"_",phenotype_col,".csv"))
plotout = paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".png")
png(plotout,res=900,units="in",width=6,height=4)
print(p)
dev.off()
print(p)
}
run_limma_no_contrast = function(phenotype_col, limma_dat = pheno, plot_prefix = "full"){
dat = limma_dat %>%
filter(MS_status=="prevalent") %>%
filter(!is.na(sex_f31_0_0) &
!is.na(age_when_attended_assessment_centre_f21003_0_0) &
!is.na(body_mass_index_bmi_f21001_2_0) &
!is.na(age_at_ms) &
!is.na(.data[[phenotype_col]]))
dat_just_proteins = dat %>% dplyr::select(all_of(protein_list))
dat$agesq = dat$age_when_attended_assessment_centre_f21003_0_0^2
# set row names
rownames(dat_just_proteins) = dat$EID
# mean imputation
dat_just_proteins = dat_just_proteins %>%
mutate_all(~ifelse(is.na(.x),
mean(.x, na.rm = TRUE), .x))
# rank normalise outcome
hist(dat[[phenotype_col]],main="Raw")
dat[[phenotype_col]] = RNOmni::RankNorm(dat[[phenotype_col]])
hist(dat[[phenotype_col]],main="Normalised")
fmla=formula(paste0("~0 + ",phenotype_col," + body_mass_index_bmi_f21001_2_0 + Batch + age_at_ms + age_when_attended_assessment_centre_f21003_2_0 + age_when_attended_assessment_centre_f21003_0_0 + sex_f31_0_0 + age_when_attended_assessment_centre_f21003_0_0:sex_f31_0_0 + agesq + agesq:sex_f31_0_0"))
design <- model.matrix(fmla, dat)
colnames(design)[ncol((design))-1]="agesex"
colnames(design)[ncol((design))]="agesq_sex"
fit = lmFit(t(dat_just_proteins), design) # fit model
tmp <- eBayes(fit) # EB smooth
# process results
pvals = tmp$p.value %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$p.value))
colnames(pvals) = c("pval","protein")
betas = tmp$coefficients %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$coefficients))
colnames(betas) = c("beta","protein")
overall_res = betas %>% left_join(pvals,by="protein") %>%
mutate(fdr = p.adjust(pval,method="fdr"),
bonf = p.adjust(pval,method="bonf")
)
plot_dat = overall_res %>%
mutate(sig = case_when(
bonf < 0.05 & beta > 0 ~ "Up",
bonf < 0.05 & beta < 0 ~ "Down",
bonf >= 0.05 ~ "NS"))
plot_dat %>% arrange(pval)
col_vec = c("blue","grey","red")
names(col_vec) = c("Down","NS","Up")
p = ggplot(plot_dat,
aes(beta,-log10(pval),col = sig))+
geom_point()+
scale_color_manual(values = col_vec)+
theme_minimal()+
ggrepel::geom_text_repel(data = plot_dat %>% filter(pval < 0.05),
mapping = aes(beta, -log10(pval),label = protein),
show.legend = F,force = 20,min.segment.length = 0,max.overlaps = 10)+
theme(panel.grid = element_blank())+
geom_hline(yintercept = -log10(bonf),linetype="dashed",alpha=0.3)+
geom_vline(xintercept = 0,linetype="dashed",alpha=0.3)+
ggtitle(paste0(phenotype_col,"\nN=",nrow(dat)))+
labs(x="Log fold change",y = "-log10(P)",col="Direction")
p
# note this is nominal P < 0.05
write_csv(overall_res,paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".csv"))
write_csv(overall_res %>%
filter(bonf < 0.05) %>%
arrange(desc(beta)),paste0("~/ukb_proteomics/outputs/sig_limma_results_",plot_prefix,"_",phenotype_col,".csv"))
plotout = paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".png")
png(plotout,res=900,units="in",width=6,height=4)
print(p)
dev.off()
print(p)
}
run_limma_no_contrast_agesex = function(phenotype_col, limma_dat = pheno, plot_prefix = "agesex"){
dat = limma_dat %>%
filter(MS_status=="prevalent") %>%
filter(!is.na(sex_f31_0_0) &
!is.na(.data[[phenotype_col]]))
dat_just_proteins = dat %>% dplyr::select(all_of(protein_list))
# set row names
rownames(dat_just_proteins) = dat$EID
# mean imputation
dat_just_proteins = dat_just_proteins %>%
mutate_all(~ifelse(is.na(.x),
mean(.x, na.rm = TRUE), .x))
# rank normalise outcome
hist(dat[[phenotype_col]],main="Raw")
dat[[phenotype_col]] = RNOmni::RankNorm(dat[[phenotype_col]])
hist(dat[[phenotype_col]],main="Normalised")
fmla=formula(paste0("~0 + ",phenotype_col," + Batch + age_when_attended_assessment_centre_f21003_2_0 + sex_f31_0_0"))
design <- model.matrix(fmla, dat)
fit = lmFit(t(dat_just_proteins), design) # fit model
tmp <- eBayes(fit) # EB smooth
# process results
pvals = tmp$p.value %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$p.value))
colnames(pvals) = c("pval","protein")
betas = tmp$coefficients %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$coefficients))
colnames(betas) = c("beta","protein")
overall_res = betas %>% left_join(pvals,by="protein") %>%
mutate(fdr = p.adjust(pval,method="fdr"),
bonf = p.adjust(pval,method="bonf")
)
plot_dat = overall_res %>%
mutate(sig = case_when(
bonf < 0.05 & beta > 0 ~ "Up",
bonf < 0.05 & beta < 0 ~ "Down",
bonf >= 0.05 ~ "NS"))
plot_dat %>% arrange(pval)
col_vec = c("blue","grey","red")
names(col_vec) = c("Down","NS","Up")
p = ggplot(plot_dat,
aes(beta,-log10(pval),col = sig))+
geom_point()+
scale_color_manual(values = col_vec)+
theme_minimal()+
ggrepel::geom_text_repel(data = plot_dat %>% filter(pval < 0.05),
mapping = aes(beta, -log10(pval),label = protein),
show.legend = F,force = 20,min.segment.length = 0,max.overlaps = 10)+
theme(panel.grid = element_blank())+
geom_hline(yintercept = -log10(bonf),linetype="dashed",alpha=0.3)+
geom_vline(xintercept = 0,linetype="dashed",alpha=0.3)+
ggtitle(paste0(phenotype_col,"\nN=",nrow(dat)))+
labs(x="Log fold change",y = "-log10(P)",col="Direction")
p
# note this is nominal P < 0.05
write_csv(overall_res,paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".csv"))
write_csv(overall_res %>%
filter(bonf < 0.05) %>%
arrange(desc(beta)),paste0("~/ukb_proteomics/outputs/sig_limma_results_",plot_prefix,"_",phenotype_col,".csv"))
plotout = paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".png")
png(plotout,res=900,units="in",width=6,height=4)
print(p)
dev.off()
print(p)
}
run_limma_no_contrast_controls = function(phenotype_col, limma_dat = pheno, plot_prefix = "controls"){
dat = limma_dat %>%
filter(MS_status!="prevalent") %>%
filter(!is.na(sex_f31_0_0) &
!is.na(age_when_attended_assessment_centre_f21003_0_0) &
!is.na(body_mass_index_bmi_f21001_2_0) &
!is.na(.data[[phenotype_col]]))
dat_just_proteins = dat %>% dplyr::select(all_of(protein_list))
dat$agesq = dat$age_when_attended_assessment_centre_f21003_0_0^2
# set row names
rownames(dat_just_proteins) = dat$EID
# mean imputation
dat_just_proteins = dat_just_proteins %>%
mutate_all(~ifelse(is.na(.x),
mean(.x, na.rm = TRUE), .x))
# rank normalise outcome
hist(dat[[phenotype_col]],main="Raw")
dat[[phenotype_col]] = RNOmni::RankNorm(dat[[phenotype_col]])
hist(dat[[phenotype_col]],main="Normalised")
fmla=formula(paste0("~0 + ",phenotype_col," + body_mass_index_bmi_f21001_2_0 + Batch + age_when_attended_assessment_centre_f21003_2_0 + age_when_attended_assessment_centre_f21003_0_0 + sex_f31_0_0 + age_when_attended_assessment_centre_f21003_0_0:sex_f31_0_0 + agesq + agesq:sex_f31_0_0"))
design <- model.matrix(fmla, dat)
colnames(design)[ncol((design))-1]="agesex"
colnames(design)[ncol((design))]="agesq_sex"
fit = lmFit(t(dat_just_proteins), design) # fit model
tmp <- eBayes(fit) # EB smooth
# process results
pvals = tmp$p.value %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$p.value))
colnames(pvals) = c("pval","protein")
betas = tmp$coefficients %>%
data.frame() %>%
dplyr::select(all_of(phenotype_col)) %>%
mutate(protein = rownames(tmp$coefficients))
colnames(betas) = c("beta","protein")
overall_res = betas %>% left_join(pvals,by="protein") %>%
mutate(fdr = p.adjust(pval,method="fdr"),
bonf = p.adjust(pval,method="bonf")
)
plot_dat = overall_res %>%
mutate(sig = case_when(
bonf < 0.05 & beta > 0 ~ "Up",
bonf < 0.05 & beta < 0 ~ "Down",
bonf >= 0.05 ~ "NS"))
plot_dat %>% arrange(pval)
col_vec = c("blue","grey","red")
names(col_vec) = c("Down","NS","Up")
p = ggplot(plot_dat,
aes(beta,-log10(pval),col = sig))+
geom_point()+
scale_color_manual(values = col_vec)+
theme_minimal()+
ggrepel::geom_text_repel(data = plot_dat %>% filter(pval < 0.05),
mapping = aes(beta, -log10(pval),label = protein),
show.legend = F,force = 20,min.segment.length = 0,max.overlaps = 10)+
theme(panel.grid = element_blank())+
geom_hline(yintercept = -log10(bonf),linetype="dashed",alpha=0.3)+
geom_vline(xintercept = 0,linetype="dashed",alpha=0.3)+
ggtitle(paste0(phenotype_col,"\nN=",nrow(dat)))+
labs(x="Log fold change",y = "-log10(P)",col="Direction")
p
# note this is nominal P < 0.05
write_csv(overall_res,paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".csv"))
write_csv(overall_res %>%
filter(bonf < 0.05) %>%
arrange(desc(beta)),paste0("~/ukb_proteomics/outputs/sig_limma_results_",plot_prefix,"_",phenotype_col,".csv"))
plotout = paste0("~/ukb_proteomics/outputs/limma_results_",plot_prefix,"_",phenotype_col,".png")
png(plotout,res=900,units="in",width=6,height=4)
print(p)
dev.off()
print(p)
}
run_limma2(limma_dat = filtered_pheno, phenotype_col = "on_ifn",reflevel="no",nonref="yes")
# run limma
run_limma_no_contrast(limma_dat = filtered_pheno,phenotype_col = "normalised_t2_lesion_vol")
run_limma_no_contrast_controls(limma_dat = filtered_pheno,phenotype_col = "normalised_t2_lesion_vol")
run_limma_no_contrast_controls(limma_dat = filtered_pheno,phenotype_col = "volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0")
run_limma_no_contrast(limma_dat = filtered_pheno,phenotype_col = "volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0")
run_limma_no_contrast_agesex(limma_dat = filtered_pheno,phenotype_col = "normalised_t2_lesion_vol")
run_limma_no_contrast_agesex(limma_dat = filtered_pheno,phenotype_col = "volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0")
p1=ggplot(filtered_pheno,
aes(MS_status,normalised_t2_lesion_vol,fill=MS_status))+
geom_boxplot()+
scale_fill_brewer(palette="Set1")+
theme_minimal()+
theme(legend.position = "none")+
labs(x="MS status",y="Normalised T2 lesion volume")
p2=ggplot(filtered_pheno,
aes(MS_status,volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,fill=MS_status))+
geom_boxplot()+
scale_fill_brewer(palette="Set1")+
theme_minimal()+
theme(legend.position = "none")+
labs(x="MS status",y="Brain volume")
p3=ggplot(filtered_pheno %>% filter(MS_status=="prevalent"),aes(total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0,volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,col=sex_f31_0_0))+geom_point()+theme_bw()+
labs(x="T2 lesion volume",y="Brain volume",col="Sex")+
scale_color_brewer(palette="Set1")+
theme(legend.position = c(0.8,0.8))
cor.test(filtered_pheno[filtered_pheno$MS_status=="prevalent",]$total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0,filtered_pheno[filtered_pheno$MS_status=="prevalent",]$volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0 ,method="spearman")
filtered_pheno = filtered_pheno %>%
mutate(disease_duration_at_scan = age_when_attended_assessment_centre_f21003_2_0 - age_at_ms
)
p4=ggplot(filtered_pheno %>% filter(MS_status=="prevalent"),
aes(disease_duration_at_scan,total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0))+geom_point()+theme_bw()+
labs(x="Disease duration at time of scan (years)",y="T2 lesion volume")
p5=ggplot(filtered_pheno %>% filter(MS_status=="prevalent"),
aes(disease_duration_at_scan,volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,col=sex_f31_0_0))+geom_point()+theme_bw()+
labs(x="Disease duration (years)",y="Brain volume",col="Sex")+
scale_color_brewer(palette="Set1")+
theme(legend.position = "none")
cor.test(filtered_pheno[filtered_pheno$MS_status=="prevalent",]$disease_duration_at_scan,filtered_pheno[filtered_pheno$MS_status=="prevalent",]$volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0 ,method="spearman")
cor.test(filtered_pheno[filtered_pheno$MS_status=="prevalent",]$disease_duration_at_scan,filtered_pheno[filtered_pheno$MS_status=="prevalent",]$total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0 ,method="spearman")
wilcox.test(filtered_pheno[filtered_pheno$MS_status=="prevalent" & filtered_pheno$sex_f31_0_0=="Male",]$total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0,
filtered_pheno[filtered_pheno$MS_status=="prevalent" & filtered_pheno$sex_f31_0_0=="Female",]$total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0)
wilcox.test(filtered_pheno[filtered_pheno$MS_status=="prevalent" & filtered_pheno$sex_f31_0_0=="Male",]$volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,
filtered_pheno[filtered_pheno$MS_status=="prevalent" & filtered_pheno$sex_f31_0_0=="Female",]$volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0)
ggplot(filtered_pheno %>% filter(MS_status=="prevalent"),
aes(sex_f31_0_0,total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0 ))+geom_boxplot()+theme_bw()
ggplot(filtered_pheno %>% filter(MS_status=="prevalent"),
aes(sex_f31_0_0,volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0 ))+geom_boxplot()+theme_bw()
png("~/ukb_proteomics/outputs/mri_data.png",res=900,units="in",height=3,width=14)
gridExtra::grid.arrange(p1,p2,p3,p5,nrow=1)
dev.off()
# combine results
brainvol_res = read_csv("~/ukb_proteomics/outputs/limma_results_full_volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0.csv")
colnames(brainvol_res)[!colnames(brainvol_res)=="protein"] = paste0("brainvol_",colnames(brainvol_res)[!colnames(brainvol_res)=="protein"])
lesion_res = read_csv("~/ukb_proteomics/outputs/limma_results_full_normalised_t2_lesion_vol.csv")
colnames(lesion_res)[!colnames(lesion_res)=="protein"] = paste0("lesion_",colnames(lesion_res)[!colnames(lesion_res)=="protein"])
combo_res = brainvol_res %>%
left_join(lesion_res,by="protein") %>%
mutate(suggestive_in_both = ifelse(brainvol_pval<0.05 & lesion_pval<0.05,"*"," ")) %>%
arrange(lesion_pval)
# combine with whole cohort
brainvol_res_controls = read_csv("~/ukb_proteomics/outputs/limma_results_controls_volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0.csv")
colnames(brainvol_res_controls)[!colnames(brainvol_res_controls)=="protein"] = paste0("ctrl_brainvol_",colnames(brainvol_res)[!colnames(brainvol_res_controls)=="protein"])
lesion_res_controls = read_csv("~/ukb_proteomics/outputs/limma_results_controls_normalised_t2_lesion_vol.csv")
colnames(lesion_res_controls)[!colnames(lesion_res_controls)=="protein"] = paste0("ctrl_lesion_",colnames(lesion_res_controls)[!colnames(lesion_res_controls)=="protein"])
# join
combo_res = combo_res %>%
left_join(brainvol_res_controls,by="protein") %>%
left_join(lesion_res_controls,by="protein")
combo_res = combo_res %>%
dplyr::select(protein,colnames(combo_res)) %>%
arrange(suggestive_in_both)
# combined rank
combo_res = combo_res %>%
arrange(lesion_pval) %>%
mutate(lesion_rank = row_number()) %>%
arrange(brainvol_pval) %>%
mutate(brainvol_rank = row_number()) %>%
mutate(av_rank = (brainvol_rank + lesion_rank)/2 ) %>%
arrange(av_rank)
filtered_pheno %>%
filter(!is.na(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_3_0)) %>%
dplyr::count(MS_status)
filtered_pheno %>%
filter(!is.na(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0)) %>%
dplyr::count(MS_status)
# compare lesion volume
filtered_pheno %>%
group_by(MS_status) %>%
summarise(
median(total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0,na.rm=T),
IQR(total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0,na.rm=T),
median(normalised_t2_lesion_vol,na.rm = T))
filtered_pheno %>%
filter(!is.na(normalised_t2_lesion_vol)) %>%
dplyr::count(MS_status)
wilcox.test(filtered_pheno[filtered_pheno$MS_status=="control",][["total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0"]],
filtered_pheno[filtered_pheno$MS_status=="prevalent",][["total_volume_of_white_matter_hyperintensities_from_t1_and_t2_flair_images_f25781_2_0"]])
# compare brain vol
filtered_pheno %>%
group_by(MS_status) %>%
summarise(
median(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,na.rm=T),
IQR(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0,na.rm=T))
filtered_pheno %>%
filter(!is.na(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0)) %>%
dplyr::count(MS_status)
wilcox.test(filtered_pheno[filtered_pheno$MS_status=="control",][["volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0"]],
filtered_pheno[filtered_pheno$MS_status=="prevalent",][["volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0"]])
# look at atrophy
filtered_pheno %>%
filter(!is.na(volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_3_0)) %>%
dplyr::count(MS_status)
filtered_pheno = filtered_pheno %>%
mutate(atrophy = volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_2_0 - volume_of_brain_greywhite_matter_normalised_for_head_size_f25009_3_0 )
wilcox.test(filtered_pheno[filtered_pheno$MS_status=="control",][["atrophy"]],
filtered_pheno[filtered_pheno$MS_status=="prevalent",][["atrophy"]])