added aa_index/ with script that return dfs for plots for shiny perhaps
This commit is contained in:
parent
1ea42097ae
commit
9b1d1d009d
2 changed files with 186 additions and 0 deletions
85
scripts/aa_index/aa_index.R
Normal file
85
scripts/aa_index/aa_index.R
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
library(bio3d)
|
||||||
|
library(seqinr)
|
||||||
|
library(bios2mds)
|
||||||
|
library(protr)
|
||||||
|
#############################################################
|
||||||
|
#%% TASK
|
||||||
|
# use this to return df for AA index and mutation properties
|
||||||
|
|
||||||
|
source()
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
my_fasta_file = "~/git/Data/streptomycin/input/gid_complex.fasta"
|
||||||
|
my_mcsmf_snps = "~/git/Data/streptomycin/output/gid_mcsm_formatted_snps.csv"
|
||||||
|
###############################################################
|
||||||
|
#%% fasta as vector
|
||||||
|
gid_aa_seq_v= read.fasta(my_fasta_file
|
||||||
|
, seqtype = "AA"
|
||||||
|
, as.string = F)
|
||||||
|
|
||||||
|
gid_aa_v = as.character(gid_aa_seq_v[[1]]); gid_aa_v
|
||||||
|
|
||||||
|
#%% fasta as string
|
||||||
|
gid_aa_seq_s = read.fasta(my_fasta_file
|
||||||
|
, seqtype = "AA"
|
||||||
|
, as.string = T)
|
||||||
|
|
||||||
|
gid_aa_s = as.character(gid_aa_seq_s[[1]]); gid_aa_s
|
||||||
|
###############################################################
|
||||||
|
#===================
|
||||||
|
# AA indices
|
||||||
|
# https://www.genome.jp/aaindex/AAindex/list_of_indices
|
||||||
|
#===================
|
||||||
|
data(aa.index)
|
||||||
|
|
||||||
|
# default
|
||||||
|
aai_kd = aa2index(gid_aa_v, index = "KYTJ820101") # Hydropathy, KD
|
||||||
|
|
||||||
|
aai_rv = aa2index(gid_aa_v, index = "BIGC670101") # Residue volume, Bigelow, 1967
|
||||||
|
aai_rv2 = aa2index(gid_aa_v, index = "GOLD730102") # Residue volume (Goldsack-Chalifoux, 1973)
|
||||||
|
aai_b = aa2index(gid_aa_v, index = "VENT840101") # Bitterness (Venanzi, 1984)
|
||||||
|
|
||||||
|
par(mfrow = c(1,1))
|
||||||
|
barplot(aai_kd)
|
||||||
|
barplot(aai_rv)
|
||||||
|
barplot(aai_rv2)
|
||||||
|
#barplot(aai_b, col = c("black", "yellow"))
|
||||||
|
|
||||||
|
##########################################################
|
||||||
|
#===================
|
||||||
|
# mutation matrices
|
||||||
|
#===================
|
||||||
|
data(sub.mat)
|
||||||
|
snps = read.csv(my_mcsmf_snps
|
||||||
|
, header = 0)
|
||||||
|
snps
|
||||||
|
colnames(snps) <- "mutationinformation"
|
||||||
|
|
||||||
|
# run using all matrices
|
||||||
|
sub_mat_names = as.character(unlist(attributes(sub.mat)))
|
||||||
|
#sub_mat_names = "BLOSUM80"
|
||||||
|
|
||||||
|
for (j in sub_mat_names){
|
||||||
|
print(j)
|
||||||
|
snps[[j]] <- NA
|
||||||
|
for (i in 1:nrow(snps)) {
|
||||||
|
curr_snp = snps$mutationinformation[i]
|
||||||
|
m1 = str_match(curr_snp, "^([A-Z]{1})[0-9]*([A-Z]{1})")
|
||||||
|
aa1 = m1[,2]
|
||||||
|
aa2 = m1[,3]
|
||||||
|
#snps$blosum_80[i]
|
||||||
|
snps[[j]][i] = sub.mat[[j]][aa1,aa2]
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
snps
|
||||||
|
##########################################################
|
||||||
|
gid_aac = extractAAC(gid_aa_s)
|
||||||
|
gid_dc = extractDC(gid_aa_s)
|
||||||
|
gid_tc = extractTC(gid_aa_s)
|
||||||
|
|
||||||
|
par(mfrow = c(1, 3))
|
||||||
|
barplot(gid_aac)
|
||||||
|
barplot(gid_dc)
|
||||||
|
barplot(gid_tc)
|
||||||
|
###########################################################
|
101
scripts/aa_index/run_aa_index.R
Normal file
101
scripts/aa_index/run_aa_index.R
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
library(bio3d)
|
||||||
|
library(seqinr)
|
||||||
|
library(bios2mds)
|
||||||
|
library(protr)
|
||||||
|
library(stringr)
|
||||||
|
####################################################################
|
||||||
|
# TASK: use this to return df for AA index and mutation properties
|
||||||
|
# useful for dfs
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# working dir and loading libraries
|
||||||
|
getwd()
|
||||||
|
setwd("~/git/LSHTM_analysis/scripts/")
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
drug = "streptomycin"
|
||||||
|
gene = "gid"
|
||||||
|
|
||||||
|
source("functions/plotting_globals.R")
|
||||||
|
import_dirs(drug_name = drug, gene_name = gene)
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
my_fasta_file = paste0(indir, "/", gene, "_complex.fasta")
|
||||||
|
|
||||||
|
my_mcsmf_snps = paste0(outdir, "/", gene, "_mcsm_formatted_snps.csv")
|
||||||
|
|
||||||
|
###############################################################
|
||||||
|
#%% fasta as vector
|
||||||
|
gid_aa_seq_v= read.fasta(my_fasta_file
|
||||||
|
, seqtype = "AA"
|
||||||
|
, as.string = F)
|
||||||
|
|
||||||
|
gid_aa_v = as.character(gid_aa_seq_v[[1]]); gid_aa_v
|
||||||
|
|
||||||
|
#%% fasta as string
|
||||||
|
gid_aa_seq_s = read.fasta(my_fasta_file
|
||||||
|
, seqtype = "AA"
|
||||||
|
, as.string = T)
|
||||||
|
|
||||||
|
gid_aa_s = as.character(gid_aa_seq_s[[1]]); gid_aa_s
|
||||||
|
###############################################################
|
||||||
|
#===================
|
||||||
|
# AA indices
|
||||||
|
# https://www.genome.jp/aaindex/AAindex/list_of_indices
|
||||||
|
#===================
|
||||||
|
data(aa.index)
|
||||||
|
|
||||||
|
# default
|
||||||
|
aai_kd = aa2index(gid_aa_v, index = "KYTJ820101") # Hydropathy, KD
|
||||||
|
|
||||||
|
aai_rv = aa2index(gid_aa_v, index = "BIGC670101") # Residue volume, Bigelow, 1967
|
||||||
|
aai_rv2 = aa2index(gid_aa_v, index = "GOLD730102") # Residue volume (Goldsack-Chalifoux, 1973)
|
||||||
|
aai_b = aa2index(gid_aa_v, index = "VENT840101") # Bitterness (Venanzi, 1984)
|
||||||
|
##########################################################
|
||||||
|
#===================
|
||||||
|
# mutation matrices
|
||||||
|
#===================
|
||||||
|
data(sub.mat)
|
||||||
|
snps = read.csv(my_mcsmf_snps
|
||||||
|
, header = 0)
|
||||||
|
snps
|
||||||
|
colnames(snps) <- "mutationinformation"
|
||||||
|
|
||||||
|
# run using all matrices
|
||||||
|
sub_mat_names = as.character(unlist(attributes(sub.mat)))
|
||||||
|
#sub_mat_names = "BLOSUM80"
|
||||||
|
|
||||||
|
for (j in sub_mat_names){
|
||||||
|
print(j)
|
||||||
|
snps[[j]] <- NA
|
||||||
|
for (i in 1:nrow(snps)) {
|
||||||
|
curr_snp = snps$mutationinformation[i]
|
||||||
|
m1 = str_match(curr_snp, "^([A-Z]{1})[0-9]*([A-Z]{1})")
|
||||||
|
aa1 = m1[,2]
|
||||||
|
aa2 = m1[,3]
|
||||||
|
#snps$blosum_80[i]
|
||||||
|
snps[[j]][i] = sub.mat[[j]][aa1,aa2]
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
snps
|
||||||
|
##########################################################
|
||||||
|
gid_aac = extractAAC(gid_aa_s)
|
||||||
|
gid_dc = extractDC(gid_aa_s)
|
||||||
|
gid_tc = extractTC(gid_aa_s)
|
||||||
|
|
||||||
|
##########################################################
|
||||||
|
# Plots
|
||||||
|
par(mfrow = c(3,2))
|
||||||
|
|
||||||
|
barplot(aai_kd , main = "AA index: KD")
|
||||||
|
#barplot(aai_rv , main = "AA index: Residue Volume, 1967")
|
||||||
|
barplot(aai_rv2 , main = "AA index: Residue Volume") #1973
|
||||||
|
barplot(aai_b , main = "AA index: Bitterness")
|
||||||
|
|
||||||
|
barplot(gid_aac , main = "AA: composition")
|
||||||
|
barplot(gid_dc , main = "AA: Dipeptide composition")
|
||||||
|
barplot(gid_tc , main = "AA: Tripeptide composition")
|
||||||
|
###########################################################
|
Loading…
Add table
Add a link
Reference in a new issue