From a5d22540e1b3db644e82e1eef757aa7cae79135e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 31 Aug 2022 22:02:16 +0100 Subject: [PATCH] renamed count_vars_ML previous version as such --- scripts/count_vars_ML.R | 214 +++----------- scripts/count_vars_ML_v1.R | 260 ++++++++++++++++++ .../plotting_thesis/alr/gg_pairs_all_alr.R | 7 +- .../embb/basic_barplots_embb.R | 4 +- .../plotting_thesis/embb/gg_pairs_all_embb.R | 5 +- .../plotting_thesis/gid/gg_pairs_all_gid.R | 2 +- .../plotting_thesis/katg/gg_pairs_all_katg.R | 15 +- .../katg/katg_ORandSNP_results.R | 8 + .../rpob/basic_barplots_rpob.R | 6 +- 9 files changed, 336 insertions(+), 185 deletions(-) create mode 100644 scripts/count_vars_ML_v1.R diff --git a/scripts/count_vars_ML.R b/scripts/count_vars_ML.R index 817b04b..461845f 100644 --- a/scripts/count_vars_ML.R +++ b/scripts/count_vars_ML.R @@ -22,13 +22,12 @@ outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv') # Add acticve site indication ############################################### merged_df2$active_site = as.integer(merged_df2$position %in% active_aa_pos) -#merged_df2_comp$active_site = as.integer(merged_df2_comp$position %in% active_aa_pos) - merged_df3$active_site = as.integer(merged_df3$position %in% active_aa_pos) -#merged_df3_comp$active_site = as.integer(merged_df3_comp$position %in% active_aa_pos) # check -cols_sel = c('mutationinformation', 'mutation_info_labels', 'dm_om_numeric', 'dst', 'dst_mode') +cols_sel = c('mutationinformation', 'mutation_info_labels' + #, 'dm_om_numeric' + , 'dst', 'dst_mode') check_mdf2 = merged_df2[, cols_sel] check_mdf2T = table(check_mdf2$mutationinformation, check_mdf2$dst_mode) @@ -42,8 +41,8 @@ dst_check = all((ft_mdf2[,1]==0)==(ft_mdf2[,2]!=0)); dst_check #======================= # CHECK: dst mode labels #======================= -table(merged_df2$mutation_info_labels_orig) -table(merged_df2$mutation_info_labels_v1) +#table(merged_df2$mutation_info_labels_orig) +#table(merged_df2$mutation_info_labels_v1) table(merged_df2$mutation_info_labels) dst_check1 = table(merged_df2$dst_mode)[1] == table(merged_df2$mutation_info_labels)[2] @@ -75,184 +74,61 @@ gene gene_match nrow(merged_df3) -########################################### -#======================== -# CHECK: drtype: revised labels [Merged_df2] -#========================= -table(merged_df2$drtype) #orig -table(merged_df2$drtype_mode) -# mapping 2.1: numeric -# drtype_map = {'XDR': 5 -# , 'Pre-XDR': 4 -# , 'MDR': 3 -# , 'Pre-MDR': 2 -# , 'Other': 1 -# , 'Sensitive': 0} - -# create a labels col that is mapped based on drtype_mode -merged_df2$drtype_mode_labels = merged_df2$drtype_mode -merged_df2$drtype_mode_labels = as.factor(merged_df2$drtype_mode) -levels(merged_df2$drtype_mode_labels) -levels(merged_df2$drtype_mode_labels) <- c('Sensitive', 'Other' - , 'Pre-MDR', 'MDR' - , 'Pre-XDR', 'XDR') -levels(merged_df2$drtype_mode_labels) -# check -a1 = all(table(merged_df2$drtype_mode) == table(merged_df2$drtype_mode_labels)) -b1 = sum(table(merged_df2$drtype_mode_labels)) == nrow(merged_df2) - -if (all(a1 && b1)){ - cat("\nPASS: added drtype mode labels to merged_df2") -}else{ - stop("FAIL: could not add drtype mode labels to merged_df2") - ##quit() -} - ################################################# - -#======================= -# CHECK: drtype: revised labels [merged_df3] -#======================= -table(merged_df3$drtype) #orig -table(merged_df3$drtype_mode) -# mapping 2.1: numeric -# drtype_map = {'XDR': 5 -# , 'Pre-XDR': 4 -# , 'MDR': 3 -# , 'Pre-MDR': 2 -# , 'Other': 1 -# , 'Sensitive': 0} - -# create a labels col that is mapped based on drtype_mode -merged_df3$drtype_mode_labels = merged_df3$drtype_mode -merged_df3$drtype_mode_labels = as.factor(merged_df3$drtype_mode) -levels(merged_df3$drtype_mode_labels) -levels(merged_df3$drtype_mode_labels) <- c('Sensitive', 'Other' - , 'Pre-MDR', 'MDR' - , 'Pre-XDR', 'XDR') -levels(merged_df3$drtype_mode_labels) -a2 = all(table(merged_df3$drtype_mode) == table(merged_df3$drtype_mode_labels)) -b2 = sum(table(merged_df3$drtype_mode_labels)) == nrow(merged_df3) -# check -if (all(a2 && b2)){ - cat("\nPASS: added drtype mode labels to merged_df3") -}else{ - stop("FAIL: could not add drtype mode labels to merged_df3") - ##quit() -} -#=============== -# CHECK: lineage -#=============== -l1 = table(merged_df3$lineage) == table(merged_df3$lineage_labels) -l2 = table(merged_df2$lineage) == table(merged_df2$lineage_labels) -l3 = sum(table(merged_df2$lineage_labels)) == nrow(merged_df2) -l4 = sum(table(merged_df3$lineage_labels)) == nrow(merged_df3) - -if (all(l1 && l2 && l3 && l4) ){ - cat("\nPASS: lineage and lineage labels are identical!") -}else{ - stop("FAIL: could not verify lineage labels") - ##quit() -} - -############################################### -# #============= -# # mutation_info: revised labels -# #============== -# table(merged_df3$mutation_info) -# sum(table(merged_df3$mutation_info)) -# table(merged_df3$mutation_info_orig) -############################################## - -# #============= -# # , dst_mode: revised labels -# #============== -# table(merged_df3$dst) # orig -# sum(table(merged_df3$dst)) -# -# table(merged_df3$dst_mode) -# #table(merged_df3[dr_muts_col]) -# sum(table(merged_df3$drtype_mode)) ############################################## -if ( all( check12 && aa_check1 && aa_check2 && a1 && b1 && a2 && b2 && l1 && l2 && l3 && l4) ){ - cat("\nWriting merged_dfs for:" - , "\nDrug:", drug - , "\nGene:", gene) - - write.csv(merged_df3, outfile_merged_df3) - #write.csv(merged_df2, outfile_merged_df2) - - cat(paste("\nmerged df3 filename:", outfile_merged_df3 +write.csv(merged_df3, outfile_merged_df3) +#write.csv(merged_df2, outfile_merged_df2) +cat(paste("\nmerged df3 filename:", outfile_merged_df3 #, "\nmerged df2 filename:", outfile_merged_df2) )) - -} else{ - stop("FAIL: Not able to write merged dfs. Please check numbers!") - #quit() -} #%%################################################################### -# check merged_df3 -check_mdf3 = merged_df3[, cols_sel] - -check_mdf3T = table(check_mdf3$mutationinformation, check_mdf3$dst_mode) -ft_mdf3 = as.data.frame.matrix(check_mdf3T) - -#================== -# CHECK: dst mode -#=================== -dst_check_mdf3 = all((ft_mdf3[,1]==0)==(ft_mdf3[,2]!=0)); dst_check_mdf3 - -sel = c("mutationinformation", "dst", "dst_mode") - -a = merged_df3[, sel] -str(a) - ################################################### ################################################### ################################################### -source("~/git/LSHTM_analysis/config/alr.R") -source("~/git/LSHTM_analysis/config/embb.R") -source("~/git/LSHTM_analysis/config/gid.R") -source("~/git/LSHTM_analysis/config/katg.R") -source("~/git/LSHTM_analysis/config/pnca.R") -source("~/git/LSHTM_analysis/config/rpob.R") -# +# source("~/git/LSHTM_analysis/config/alr.R") +# source("~/git/LSHTM_analysis/config/embb.R") +# source("~/git/LSHTM_analysis/config/gid.R") +# source("~/git/LSHTM_analysis/config/katg.R") +# source("~/git/LSHTM_analysis/config/pnca.R") +# source("~/git/LSHTM_analysis/config/rpob.R") +# # df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") df3 = read.csv(df3_filename) -# +# # # mutationinformation length(unique((df3$mutationinformation))) +# # +# # #dm _om +# table(df3$mutation_info) +# #table(df3$mutation_info_orig) +# #table(df3$mutation_info_labels_orig) # -# #dm _om -table(df3$mutation_info) -table(df3$mutation_info_orig) -table(df3$mutation_info_labels_orig) - -# used in plots and analyses -table(df3$mutation_info_labels) # different, and matches dst_mode -table(df3$dst_mode) - -# test_set -na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) -na_count[drug] +# # used in plots and analyses +# table(df3$mutation_info_labels) # different, and matches dst_mode +# table(df3$dst_mode) +# +# # test_set +# na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) +# na_count[drug] +# # +# # # training set +# table(df3[drug]) +# # +# # # drtype: MDR and XDR +# # #table(df3$drtype) orig i.e. incorrect ones! +# # table(df3$drtype_mode_labels) # -# # training set -table(df3[drug]) # -# # drtype: MDR and XDR -# #table(df3$drtype) orig i.e. incorrect ones! -# table(df3$drtype_mode_labels) - - -df3_complete = df3 -table(df3_complete$dst_mode) -comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),] -table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage)) - -df3_actual = df3[!is.na(df3$dst), ] -table(df3_actual$dst_mode) -comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),] -table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage)) \ No newline at end of file +# df3_complete = df3 +# table(df3_complete$dst_mode) +# comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +# table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage)) +# +# df3_actual = df3[!is.na(df3$dst), ] +# table(df3_actual$dst_mode) +# comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +# table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage)) +# diff --git a/scripts/count_vars_ML_v1.R b/scripts/count_vars_ML_v1.R new file mode 100644 index 0000000..a0675cd --- /dev/null +++ b/scripts/count_vars_ML_v1.R @@ -0,0 +1,260 @@ +# count numbers for ML + +source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/rpob.R") + +############################# +# GET the actual merged dfs +############################# +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +############################# +# Output files: merged data +############################# +outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv') +#outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv') + +################################################ +# Add acticve site indication +############################################### +merged_df2$active_site = as.integer(merged_df2$position %in% active_aa_pos) +#merged_df2_comp$active_site = as.integer(merged_df2_comp$position %in% active_aa_pos) + +merged_df3$active_site = as.integer(merged_df3$position %in% active_aa_pos) +#merged_df3_comp$active_site = as.integer(merged_df3_comp$position %in% active_aa_pos) + +# check +cols_sel = c('mutationinformation', 'mutation_info_labels' + #, 'dm_om_numeric' + , 'dst', 'dst_mode') + +check_mdf2 = merged_df2[, cols_sel] +check_mdf2T = table(check_mdf2$mutationinformation, check_mdf2$dst_mode) +ft_mdf2 = as.data.frame.matrix(check_mdf2T) + +#================== +# CHECK: dst mode +#=================== +dst_check = all((ft_mdf2[,1]==0)==(ft_mdf2[,2]!=0)); dst_check + +#======================= +# CHECK: dst mode labels +#======================= +table(merged_df2$mutation_info_labels_orig) +table(merged_df2$mutation_info_labels_v1) +table(merged_df2$mutation_info_labels) + +dst_check1 = table(merged_df2$dst_mode)[1] == table(merged_df2$mutation_info_labels)[2] +dst_check2 = table(merged_df2$dst_mode)[2] == table(merged_df2$mutation_info_labels)[1] + +check12 = all(dst_check && all(dst_check1 == dst_check2)) + +if (check12) { + cat('\nPASS: dst mode labels verified. merged_df3 CAN be trusted! ') +}else{ + stop('FAIL: Something is wrong with the dst_mode column. Quitting!') +} + +table(is.na(merged_df3$dst)) + +#========================== +# CHECK: active site labels +#========================== +table(merged_df2$active_site) +table(merged_df3$active_site) +aa_check1 = all( table(merged_df2$active_site) == table(as.integer(merged_df2$position %in% active_aa_pos)) ) +aa_check2 = all( table(merged_df3$active_site) == table(as.integer(merged_df3$position %in% active_aa_pos)) ) + +if ( all(aa_check1 && aa_check2) ){ + cat('\nActive site indications successfully applied to merged_dfs for gene:', tolower(gene)) +} + +gene +gene_match + +nrow(merged_df3) +########################################### +#======================== +# CHECK: drtype: revised labels [Merged_df2] +#========================= +table(merged_df2$drtype) #orig +table(merged_df2$drtype_mode) +# mapping 2.1: numeric +# drtype_map = {'XDR': 5 +# , 'Pre-XDR': 4 +# , 'MDR': 3 +# , 'Pre-MDR': 2 +# , 'Other': 1 +# , 'Sensitive': 0} + +# create a labels col that is mapped based on drtype_mode +merged_df2$drtype_mode_labels = merged_df2$drtype_mode +merged_df2$drtype_mode_labels = as.factor(merged_df2$drtype_mode) +levels(merged_df2$drtype_mode_labels) +levels(merged_df2$drtype_mode_labels) <- c('Sensitive', 'Other' + , 'Pre-MDR', 'MDR' + , 'Pre-XDR', 'XDR') +levels(merged_df2$drtype_mode_labels) +# check +a1 = all(table(merged_df2$drtype_mode) == table(merged_df2$drtype_mode_labels)) +b1 = sum(table(merged_df2$drtype_mode_labels)) == nrow(merged_df2) + +if (all(a1 && b1)){ + cat("\nPASS: added drtype mode labels to merged_df2") +}else{ + stop("FAIL: could not add drtype mode labels to merged_df2") + ##quit() +} + ################################################# + +#======================= +# CHECK: drtype: revised labels [merged_df3] +#======================= +table(merged_df3$drtype) #orig +table(merged_df3$drtype_mode) +# mapping 2.1: numeric +# drtype_map = {'XDR': 5 +# , 'Pre-XDR': 4 +# , 'MDR': 3 +# , 'Pre-MDR': 2 +# , 'Other': 1 +# , 'Sensitive': 0} + +# create a labels col that is mapped based on drtype_mode +merged_df3$drtype_mode_labels = merged_df3$drtype_mode +merged_df3$drtype_mode_labels = as.factor(merged_df3$drtype_mode) +levels(merged_df3$drtype_mode_labels) +levels(merged_df3$drtype_mode_labels) <- c('Sensitive', 'Other' + , 'Pre-MDR', 'MDR' + , 'Pre-XDR', 'XDR') +levels(merged_df3$drtype_mode_labels) +a2 = all(table(merged_df3$drtype_mode) == table(merged_df3$drtype_mode_labels)) +b2 = sum(table(merged_df3$drtype_mode_labels)) == nrow(merged_df3) +# check +if (all(a2 && b2)){ + cat("\nPASS: added drtype mode labels to merged_df3") +}else{ + stop("FAIL: could not add drtype mode labels to merged_df3") + ##quit() +} +#=============== +# CHECK: lineage +#=============== +l1 = table(merged_df3$lineage) == table(merged_df3$lineage_labels) +l2 = table(merged_df2$lineage) == table(merged_df2$lineage_labels) +l3 = sum(table(merged_df2$lineage_labels)) == nrow(merged_df2) +l4 = sum(table(merged_df3$lineage_labels)) == nrow(merged_df3) + +if (all(l1 && l2 && l3 && l4) ){ + cat("\nPASS: lineage and lineage labels are identical!") +}else{ + stop("FAIL: could not verify lineage labels") + ##quit() +} + +############################################### +# #============= +# # mutation_info: revised labels +# #============== +# table(merged_df3$mutation_info) +# sum(table(merged_df3$mutation_info)) +# table(merged_df3$mutation_info_orig) +############################################## + +# #============= +# # , dst_mode: revised labels +# #============== +# table(merged_df3$dst) # orig +# sum(table(merged_df3$dst)) +# +# table(merged_df3$dst_mode) +# #table(merged_df3[dr_muts_col]) +# sum(table(merged_df3$drtype_mode)) + +############################################## +if ( all( check12 && aa_check1 && aa_check2 && a1 && b1 && a2 && b2 && l1 && l2 && l3 && l4) ){ + cat("\nWriting merged_dfs for:" + , "\nDrug:", drug + , "\nGene:", gene) + + write.csv(merged_df3, outfile_merged_df3) + #write.csv(merged_df2, outfile_merged_df2) + + cat(paste("\nmerged df3 filename:", outfile_merged_df3 + #, "\nmerged df2 filename:", outfile_merged_df2) + )) + +} else{ + stop("FAIL: Not able to write merged dfs. Please check numbers!") + #quit() +} + +#%%################################################################### +# check merged_df3 +check_mdf3 = merged_df3[, cols_sel] + +check_mdf3T = table(check_mdf3$mutationinformation, check_mdf3$dst_mode) +ft_mdf3 = as.data.frame.matrix(check_mdf3T) + +#================== +# CHECK: dst mode +#=================== +dst_check_mdf3 = all((ft_mdf3[,1]==0)==(ft_mdf3[,2]!=0)); dst_check_mdf3 + +sel = c("mutationinformation", "dst", "dst_mode") + +a = merged_df3[, sel] +str(a) + + +################################################### +################################################### +################################################### + +source("~/git/LSHTM_analysis/config/alr.R") +source("~/git/LSHTM_analysis/config/embb.R") +source("~/git/LSHTM_analysis/config/gid.R") +source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/config/pnca.R") +source("~/git/LSHTM_analysis/config/rpob.R") +# +df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") +df3 = read.csv(df3_filename) +# +# mutationinformation +length(unique((df3$mutationinformation))) +# +# #dm _om +table(df3$mutation_info) +table(df3$mutation_info_orig) +table(df3$mutation_info_labels_orig) + +# used in plots and analyses +table(df3$mutation_info_labels) # different, and matches dst_mode +table(df3$dst_mode) + +# test_set +na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) +na_count[drug] +# +# # training set +table(df3[drug]) +# +# # drtype: MDR and XDR +# #table(df3$drtype) orig i.e. incorrect ones! +# table(df3$drtype_mode_labels) + + +df3_complete = df3 +table(df3_complete$dst_mode) +comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage)) + +df3_actual = df3[!is.na(df3$dst), ] +table(df3_actual$dst_mode) +comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage)) \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/alr/gg_pairs_all_alr.R b/scripts/plotting/plotting_thesis/alr/gg_pairs_all_alr.R index 2236258..f311967 100644 --- a/scripts/plotting/plotting_thesis/alr/gg_pairs_all_alr.R +++ b/scripts/plotting/plotting_thesis/alr/gg_pairs_all_alr.R @@ -1,12 +1,13 @@ source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") -source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") #======= # output #======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") #outdir_images = paste0("/home/pub/Work/LSHTM/Thesis_Plots/pairs/") -#cat("plots will output to:", outdir_images) + +cat("plots will output to:", outdir_images) custom_cor <- function( data, @@ -190,7 +191,7 @@ unmasked_vals # Stability #================ corr_ps_colnames = c(static_cols - , "DUET" + , "mCSM-DUET" , "FoldX" , "DeepDDG" , "Dynamut2" diff --git a/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R b/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R index 4ae9f50..7eec863 100644 --- a/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R +++ b/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R @@ -7,8 +7,8 @@ #============= # Data: Input #============== -#source("~/git/LSHTM_analysis/config/embb.R") -#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/config/embb.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") #cat("\nSourced plotting cols as well:", length(plotting_cols)) diff --git a/scripts/plotting/plotting_thesis/embb/gg_pairs_all_embb.R b/scripts/plotting/plotting_thesis/embb/gg_pairs_all_embb.R index b855756..2970bb1 100644 --- a/scripts/plotting/plotting_thesis/embb/gg_pairs_all_embb.R +++ b/scripts/plotting/plotting_thesis/embb/gg_pairs_all_embb.R @@ -1,5 +1,4 @@ #source("~/git/LSHTM_analysis/config/embb.R") -#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") my_gg_pairs=function(plot_df, plot_title @@ -53,7 +52,7 @@ corr_plotdf = corr_data_extract(merged_df3 aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))] static_cols = c("Log10(MAF)" - , "Log10(OR)" + #, "Log10(OR)" ) ############################################################ #============================================= @@ -85,7 +84,7 @@ unmasked_vals # Stability #================ corr_ps_colnames = c(static_cols - , "DUET" + , "mCSM-DUET" , "FoldX" , "DeepDDG" , "Dynamut2" diff --git a/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R b/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R index 439e03a..f60ed48 100644 --- a/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R +++ b/scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R @@ -95,7 +95,7 @@ unmasked_vals # Stability #================ corr_ps_colnames = c(static_cols - , "DUET" + , "mCSM-DUET" , "FoldX" , "DeepDDG" , "Dynamut2" diff --git a/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R b/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R index ea2ba19..91be116 100644 --- a/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R +++ b/scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R @@ -1,6 +1,13 @@ -#source("~/git/LSHTM_analysis/config/katg.R") -#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") -#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + + my_gg_pairs=function(plot_df, plot_title , tt_args_size = 2.5 @@ -85,7 +92,7 @@ unmasked_vals # Stability #================ corr_ps_colnames = c(static_cols - , "DUET" + , "mCSM-DUET" , "FoldX" , "DeepDDG" , "Dynamut2" diff --git a/scripts/plotting/plotting_thesis/katg/katg_ORandSNP_results.R b/scripts/plotting/plotting_thesis/katg/katg_ORandSNP_results.R index 9c4375a..4716963 100644 --- a/scripts/plotting/plotting_thesis/katg/katg_ORandSNP_results.R +++ b/scripts/plotting/plotting_thesis/katg/katg_ORandSNP_results.R @@ -203,6 +203,14 @@ write.csv(bar_or, paste0(outdir_stats, "katg_OR_10.csv")) top10_or$position[top10_or$position%in%active_aa_pos] +# maf +bar_maf = bar_or[order(bar_or$maf_percent + , bar_or$ligand_distance + # bar_or$nca_dist + , bar_or$interface_dist + , decreasing = T), ] + +head(bar_maf) ######################################################### # closest most sig bar_or_lig = bar_or[bar_or$ligand_distance<10,] diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R index 79978b8..b7aafab 100644 --- a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R +++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R @@ -7,10 +7,10 @@ #============= # Data: Input #============== -#source("~/git/LSHTM_analysis/config/rpob.R") -#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/config/rpob.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") -#cat("\nSourced plotting cols as well:", length(plotting_cols)) +cat("\nSourced plotting cols as well:", length(plotting_cols)) #################################################### class(merged_df3)