diff --git a/scripts/mut_landscape.R b/scripts/plotting/mut_landscape.R similarity index 100% rename from scripts/mut_landscape.R rename to scripts/plotting/mut_landscape.R diff --git a/scripts/plotting/mut_landscape_5uhc_rpob.R b/scripts/plotting/mut_landscape_5uhc_rpob.R new file mode 100644 index 0000000..53f95d0 --- /dev/null +++ b/scripts/plotting/mut_landscape_5uhc_rpob.R @@ -0,0 +1,191 @@ +source("~/git/LSHTM_analysis/config/rpob.R") + +#================================ +# output files +# In total: 6 files are written +#================================ +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) + +# mutational positions: all +outfile_mutpos = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_all.txt") +outfile_meta1 = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_cu.txt") + +# mutational positions with sensitivity: S, R and common +outfile_mutpos_S = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_S.txt") +outfile_mutpos_R = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_R.txt") +outfile_mutpos_common = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_common.txt") +outfile_meta2 = paste0(outdir_images, "/5uhc_", tolower(gene), "_mutpos_annot_cu.txt") + +#============= +# Input +#============= +df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") +df3 = read.csv(df3_filename) + +chain_suffix = ".C" + +# mut_info checks +table(df3$mutation_info) +table(df3$mutation_info_orig) +table(df3$mutation_info_labels_orig) + +# used in plots and analyses +table(df3$mutation_info_labels) # different, and matches dst_mode +table(df3$dst_mode) + +# create column based on dst mode with different colname +table(is.na(df3$dst)) +table(is.na(df3$dst_mode)) + +#=============== +# Create column: sensitivity mapped to dst_mode +#=============== +df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") +table(df3$sensitivity) + +length(unique((df3$mutationinformation))) +all_colnames = as.data.frame(colnames(df3)) + +############################################################ +cols_to_extract = c("mutationinformation" + , "wild_type" + , "chain" + , "mutant_type" + , "position" + , "X5uhc_position" + , "X5uhc_offset" + , "dst_mode" + , "mutation_info_labels_orig" + , "mutation_info_labels" + , "sensitivity") + +df3_plot = df3[, cols_to_extract] + +# use the x5uhc_position column + +# create pos_chain column: allows easier colouring in chimera +df3_plot$pos_chain = paste(df3_plot$X5uhc_position, chain_suffix, sep = ".") +pos_cu = length(unique(df3_plot$X5uhc_position)) +X5uhc_pos = unique(df3_plot$X5uhc_position) +X5uhc_pos = paste0(X5uhc_pos, chain_suffix) + +#=========================== +# positions with mutations +#=========================== +df3_all_mut_pos = df3_plot[, c("X5uhc_position", "pos_chain")] +gene_mut_pos_u = unique(df3_all_mut_pos$pos_chain) +class(gene_mut_pos_u) +paste(gene_mut_pos_u, collapse=',') + +if (length(gene_mut_pos_u) == pos_cu){ + cat("\nPASS: all mutation positions extracted" + , "\nWriting file:", outfile_mutpos) +} else{ + stop("\nAbort: mutation position count mismatch") +} + +write.table(paste(gene_mut_pos_u, collapse=',') + , outfile_mutpos + , row.names = F + , col.names = F) + +write.table(paste("Count of positions with mutations in gene" + , tolower(gene), ":", pos_cu) + , outfile_meta1 + , row.names = F + , col.names = F) +#======================================== +# positions with sensitivity annotations +#======================================== +df3_muts_annot = df3_plot[, c("mutationinformation", "X5uhc_position", "pos_chain", "sensitivity")] + +# aggregrate position count by sensitivity +result <- aggregate(sensitivity ~ X5uhc_position, data = df3_muts_annot, paste, collapse = "") + +sensitive_pos = result$X5uhc_position[grep("(^S+$)", result$sensitivity)] +sensitive_pos = paste0(sensitive_pos, chain_suffix) + +resistant_pos = result$X5uhc_position[grep("(^R+$)", result$sensitivity)] +resistant_pos = paste0(resistant_pos, chain_suffix) + +common_pos = result$X5uhc_position[grep("SR|RS" , result$sensitivity)] +common_pos = paste0(common_pos, chain_suffix) + +if (tolower(gene)!= "alr"){ + length_check = length(sensitive_pos) + length(resistant_pos) + length(common_pos) + cpl = length(common_pos) +}else{ + length_check = length(sensitive_pos) + length(resistant_pos) + cpl = 0 +} + +if (length_check == pos_cu){ + cat("\nPASS: position with mutational sensitivity extracted" + , "\nWriting files: sensitive, resistant and common position counts" ) +} else{ + stop("\nAbort: position with mutational sensitivity count mismatch") +} +# spl handling for rpob 5uhc +revised_gene_mut_pos_u = c(sensitive_pos, resistant_pos, common_pos) +revised_pos_cu = length(unique(revised_gene_mut_pos_u)) +if (length(revised_gene_mut_pos_u) == revised_pos_cu){ + cat("\nPASS: all mutation positions extracted" + , "\nWriting file:", outfile_mutpos) +} else{ + stop("\nAbort: mutation position count mismatch") +} + +write.table(paste(revised_gene_mut_pos_u, collapse=',') + , outfile_mutpos + , row.names = F + , col.names = F) + +write.table(paste("Count of positions with mutations in gene" + , tolower(gene), ":", revised_pos_cu) + , outfile_meta1 + , row.names = F + , col.names = F) + +# mut_annot +write.table(paste(sensitive_pos, collapse = ',') + , outfile_mutpos_S + , row.names = F, col.names = F) + +write.table(paste(resistant_pos, collapse = ',') + , outfile_mutpos_R + , row.names = F, col.names = F) + +write.table(paste(common_pos, collapse = ',') + , outfile_mutpos_common + , row.names = F, col.names = F) + +write.table(paste("Count of positions with mutations in gene:" + , tolower(gene) + , "\nTotal mutational positions:", revised_pos_cu + , "\nsensitive:", length(sensitive_pos) + , "\nresistant:", length(resistant_pos) + , "\ncommon:" , cpl) + , outfile_meta2 + , row.names = F + , col.names = F) + +#Quick check to find out the discrepancy +revised_gene_mut_pos_u +gene_mut_pos_u + +library("qpcR") +foo <- data.frame(qpcR:::cbind.na(gene_mut_pos_u, revised_gene_mut_pos_u)) + + +table(!gene_mut_pos_u%in%revised_gene_mut_pos_u) +table(!revised_gene_mut_pos_u%in%gene_mut_pos_u) + +X5uhc_pos +#table(!gene_mut_pos_u%in%X5uhc_pos) +table(X5uhc_pos%in%gene_mut_pos_u) + +X5uhc_pos[!X5uhc_pos%in%gene_mut_pos_u] +X5uhc_pos[!gene_mut_pos_u%in%X5uhc_pos] + +#TODO: NOTE +#D1148G (i.e D1154) is NOT Present in 5UHC