source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/config/gid.R") source("~/git/LSHTM_analysis/config/katg.R") source("~/git/LSHTM_analysis/config/pnca.R") source("~/git/LSHTM_analysis/config/rpob.R") #================================ # output files # In total: 6 files are written #================================ outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) # mutational positions: all outfile_mutpos = paste0(outdir_images, "/", tolower(gene), "_mutpos_all.txt") outfile_meta1 = paste0(outdir_images, "/", tolower(gene), "_mutpos_cu.txt") # mutational positions with sensitivity: S, R and common outfile_mutpos_S = paste0(outdir_images, "/", tolower(gene), "_mutpos_S.txt") outfile_mutpos_R = paste0(outdir_images, "/", tolower(gene), "_mutpos_R.txt") outfile_mutpos_common = paste0(outdir_images, "/", tolower(gene), "_mutpos_common.txt") outfile_meta2 = paste0(outdir_images, "/", tolower(gene), "_mutpos_annot_cu.txt") #============= # Input #============= df3_filename = paste0("~/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") df3 = read.csv(df3_filename) # Determine for each gene if (tolower(gene) == "embb"){ chain_suffix = ".B" } else{ chain_suffix = ".A" } # mut_info checks table(df3$mutation_info) table(df3$mutation_info_orig) table(df3$mutation_info_labels_orig) # used in plots and analyses table(df3$mutation_info_labels) # different, and matches dst_mode table(df3$dst_mode) # create column based on dst mode with different colname table(is.na(df3$dst)) table(is.na(df3$dst_mode)) #=============== # Create column: sensitivity mapped to dst_mode #=============== df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") table(df3$sensitivity) length(unique((df3$mutationinformation))) all_colnames = as.data.frame(colnames(df3)) ############################################################ cols_to_extract = c("mutationinformation" , "wild_type" , "chain" , "mutant_type" , "position" , "dst_mode" , "mutation_info_labels_orig" , "mutation_info_labels" , "sensitivity") df3_plot = df3[, cols_to_extract] # create pos_chain column: allows easier colouring in chimera df3_plot$pos_chain = paste(df3_plot$position, df3_plot$chain, sep = ".") pos_cu = length(unique(df3_plot$position)) #=========================== # positions with mutations #=========================== #df3_all_mut_pos = df3_plot[, c("mutationinformation", "position", "chain")] #df3_all_mut_pos$pos_chain = paste(df3_all_mut_pos$position, df3_all_mut_pos$chain, sep = ".") df3_all_mut_pos = df3_plot[, c("position", "pos_chain")] gene_mut_pos_u = unique(df3_all_mut_pos$pos_chain) class(gene_mut_pos_u) paste(gene_mut_pos_u, collapse=',') if (length(gene_mut_pos_u) == pos_cu){ cat("\nPASS: all mutation positions extracted" , "\nWriting file:", outfile_mutpos) } else{ stop("\nAbort: mutation position count mismatch") } write.table(paste(gene_mut_pos_u, collapse=',') , outfile_mutpos , row.names = F , col.names = F) write.table(paste("Count of positions with mutations in gene" , tolower(gene), ":", pos_cu) , outfile_meta1 , row.names = F , col.names = F) #======================================== # positions with sensitivity annotations #======================================== df3_muts_annot = df3_plot[, c("mutationinformation", "position", "pos_chain", "sensitivity")] # aggregrate position count by sensitivity result <- aggregate(sensitivity ~ position, data = df3_muts_annot, paste, collapse = "") sensitive_pos = result$position[grep("(^S+$)", result$sensitivity)] sensitive_pos = paste0(sensitive_pos, chain_suffix) resistant_pos = result$position[grep("(^R+$)", result$sensitivity)] resistant_pos = paste0(resistant_pos, chain_suffix) common_pos = result$position[grep("SR|RS" , result$sensitivity)] common_pos = paste0(common_pos, chain_suffix) if (tolower(gene)!= "alr"){ length_check = length(sensitive_pos) + length(resistant_pos) + length(common_pos) cpl = length(common_pos) }else{ length_check = length(sensitive_pos) + length(resistant_pos) cpl = 0 } if (length_check == pos_cu){ cat("\nPASS: position with mutational sensitivity extracted" , "\nWriting files: sensitive, resistant and common position counts" ) } else{ stop("\nAbort: position with mutational sensitivity count mismatch") } write.table(paste(sensitive_pos, collapse = ',') , outfile_mutpos_S , row.names = F, col.names = F) write.table(paste(resistant_pos, collapse = ',') , outfile_mutpos_R , row.names = F, col.names = F) write.table(paste(common_pos, collapse = ',') , outfile_mutpos_common , row.names = F, col.names = F) write.table(paste("Count of positions with mutations in gene:" , tolower(gene) , "\nTotal mutational positions:", pos_cu , "\nsensitive:", length(sensitive_pos) , "\nresistant:", length(resistant_pos) , "\ncommon:" , cpl) , outfile_meta2 , row.names = F , col.names = F)