#!/usr/bin/env Rscript library(bio3d) library(seqinr) library(bios2mds) library(protr) library(stringr) #################################################################### # TASK: use this to return df for AA index and mutation properties # useful for dfs ##################################################################### # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/") getwd() drug = "streptomycin" gene = "gid" source("functions/plotting_globals.R") import_dirs(drug_name = drug, gene_name = gene) ############################################################## my_fasta_file = paste0(indir, "/", gene, "_complex.fasta") my_mcsmf_snps = paste0(outdir, "/", gene, "_mcsm_formatted_snps.csv") ############################################################### #%% fasta as vector gid_aa_seq_v= read.fasta(my_fasta_file , seqtype = "AA" , as.string = F) gid_aa_v = as.character(gid_aa_seq_v[[1]]); gid_aa_v #%% fasta as string gid_aa_seq_s = read.fasta(my_fasta_file , seqtype = "AA" , as.string = T) gid_aa_s = as.character(gid_aa_seq_s[[1]]); gid_aa_s ############################################################### #=================== # AA indices # https://www.genome.jp/aaindex/AAindex/list_of_indices #=================== data(aa.index) # default aai_kd = aa2index(gid_aa_v, index = "KYTJ820101") # Hydropathy, KD aai_rv = aa2index(gid_aa_v, index = "BIGC670101") # Residue volume, Bigelow, 1967 aai_rv2 = aa2index(gid_aa_v, index = "GOLD730102") # Residue volume (Goldsack-Chalifoux, 1973) aai_b = aa2index(gid_aa_v, index = "VENT840101") # Bitterness (Venanzi, 1984) ########################################################## #=================== # mutation matrices #=================== data(sub.mat) snps = read.csv(my_mcsmf_snps , header = 0) snps colnames(snps) <- "mutationinformation" # run using all matrices sub_mat_names = as.character(unlist(attributes(sub.mat))) #sub_mat_names = "BLOSUM80" for (j in sub_mat_names){ print(j) snps[[j]] <- NA for (i in 1:nrow(snps)) { curr_snp = snps$mutationinformation[i] m1 = str_match(curr_snp, "^([A-Z]{1})[0-9]*([A-Z]{1})") aa1 = m1[,2] aa2 = m1[,3] #snps$blosum_80[i] snps[[j]][i] = sub.mat[[j]][aa1,aa2] } } snps ########################################################## gid_aac = extractAAC(gid_aa_s) gid_dc = extractDC(gid_aa_s) gid_tc = extractTC(gid_aa_s) ########################################################## # Plots par(mfrow = c(3,2)) barplot(aai_kd , main = "AA index: KD") #barplot(aai_rv , main = "AA index: Residue Volume, 1967") barplot(aai_rv2 , main = "AA index: Residue Volume") #1973 barplot(aai_b , main = "AA index: Bitterness") barplot(gid_aac , main = "AA: composition") barplot(gid_dc , main = "AA: Dipeptide composition") barplot(gid_tc , main = "AA: Tripeptide composition") ###########################################################