diff --git a/scripts/plotting/ggridges_lineage_country.R b/scripts/plotting/ggridges_lineage_country.R new file mode 100644 index 0000000..a63cdd9 --- /dev/null +++ b/scripts/plotting/ggridges_lineage_country.R @@ -0,0 +1,279 @@ + + +######################################################### +# 1: Installing and loading required packages +######################################################### + +#source("../Header_TT.R") +install.packages("qqman") +library(qqman) + +source("combining_dfs_plotting.R") +#mcsm_data: raw file, 225, 15 +#merged_df2 = 2201, 35 +#merged_df3 = 205, 35 ("Can't trust non-numerical params') + +#=============================================== +# PLOTS: DUET vs GWAS: non-numerical +# lineage, country_code, etc +# merged_df2: 1592, 35 +#=============================================== + +######################### +# Data for plot +######################### +df = merged_df2 +df = merged_df2_comp + + +#======================== +# Plot 1a: Lineage barplot +# x = lineage y = No of samples +# col = Lineage +# fill = lineage +#======================== +is.factor(df$lineage) +df$lineage = as.factor(df$lineage) +is.factor(df$lineage) + +table(df$lineage) + +# subset only lineages1-4 +sel_lineages = c("lineage1" + , "lineage2" + , "lineage3" + , "lineage4" + #, "lineage5" + #, "lineage6" + #, "lineage7" +) + +# uncomment as necessary +df_lin = subset(df, subset = lineage %in% sel_lineages ) +table(df_lin$lineage) + +#%%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df <- df_lin +#%%%%%%%%%%%%%%%%%%%%%%%%% + +############## begin plot +g = ggplot(df, aes(x = lineage)) +g + geom_bar(aes(fill = lineage)) + + theme( axis.text.x = element_text(size = 13 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) ) + + labs(title = "Lineage" + , x = "Lineage" + , y = "No of samples") + + +#======================== +# Plot 2: DUET, lineage, country_code and or_mychisq +# x = lineage y = DUET +# col = Lineage +# fill = country_code +#======================== +### begin plot +g = ggplot(df, aes(x = country_code + , y = duet_scaled)) +g + geom_point(aes(col = lineage + , size = or_mychisq)) + + theme(axis.text.x = element_text(size = 13 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) ) + + labs(title = "DUET, country_code, lineage, or_mychisq" + , x = "Lineage" + , y = "DUET (PS)") + + +############# +#======================== +# Plot 3: DUET, lineage, or_mychisq +# x = lineage y = DUET +# col = Lineage +# fill = country_code +#======================== + +### begin plot +table(df$lineage) + +g = ggplot(df, aes(x = lineage + , y = duet_scaled)) +g + geom_point(aes(col = lineage + , size = or_mychisq)) + + theme(axis.text.x = element_text(size = 13 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = 15) + , axis.title.y = element_text(size = 15) ) + + labs(title = "DUET, lineage, or_mychisq" + , x = "Lineage" + , y = "DUET (PS)") + +#======================== +# Plot 4-6: Distributions +# ggrdiges +#======================== + +df$country_code_symbols = as.character(df$country_code) +foo = df + +foo = foo%>% + add_count(country_code) + +n = which(colnames(foo) == "n") +colnames(foo)[n] = "count_country" + + +table(foo$count_country) + + +check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country)) +str(check) +check$V2 = as.numeric(check$V2) +min(check$V2); max(check$V2) +table(check$V2) + +check2 = subset(check, check$V2>100) +check2 = subset(check2, is.na(check2$V1)) +min(check2$V2); max(check2$V2) + +check3 = subset(check2, check2$V1 != "") +table(check3$V1 != "") + + +# subset df with country containing >100 samples +# and no missing + +#%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df2 = df +#%%%%%%%%%%%%%%%%%%%%%%%% +df2 = df2%>% + add_count(country_code) + +str(df2$country_code); str(df2$n) + +n = which(colnames(df2) == "n") +colnames(df2)[n] = "count_country" + +table(df2$count_country>100 & df$country_code!= "") +df3 = subset(df2, df2$count_country>100 & df2$country_code != "") + + +#%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df = df3 +#%%%%%%%%%%%%%%%%%%%%%%%% + +#================================================== +my_ats = 15 # axis text size +my_als = 20 # axis label size + +my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4' + #, 'Lineage 5', 'Lineage 6', 'Lineage 7' + ) +names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4' + # , 'lineage5', 'lineage6', 'lineage7' + ) + + +#======================== +# Plot 4: Distribution +# x = duet_scaled +# y = country +# fill = country_code +# facet = lineage +#======================== +# works neatly! + +p1 = ggplot(df, aes(x = duet_scaled + , y = country_code))+ + + #printFile=geom_density_ridges_gradient( + geom_density_ridges_gradient(aes(fill = country_code) + , scale = 3 + , size = 0.3 ) + + facet_wrap( ~lineage + , scales = "free" + , switch = 'x' + , labeller = labeller(lineage = my_labels) + ) + + coord_cartesian( xlim = c(-1, 1)) + + #scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") + # , name = "DUET" ) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + #, axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-5) + , legend.title = element_text(size = my_als) + ) + +p1 + + +#======================== +# Plot 5: Distribution +# x = duet_scaled +# y = country_code +# fill = lineage +# facet = NONE +#======================== +# no facet wrap + +p2 = ggplot(df, aes(x = duet_scaled + , y = country_code))+ + + geom_density_ridges_gradient(aes(fill = factor(lineage)) + , scale = 3 + , size = 0.3 ) + + coord_cartesian( xlim = c(-1, 1)) + + #scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") + # , name = "DUET" ) + + #scale_fill_continuous(colours = c("darkgreen", "pink", "orange", "brown") + # , name = "lineage" ) + + theme(axis.text.x = element_text(size = my_ats + , angle = 90 + , hjust = 1 + , vjust = 0.4) + + #, axis.text.y = element_blank() + , axis.title.x = element_blank() + , axis.title.y = element_blank() + , axis.ticks.y = element_blank() + , plot.title = element_blank() + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = my_als-5) + , legend.title = element_text(size = my_als) + ) + +p2 +