Merge branch 'gidb_dev'

This commit is contained in:
Tanushree Tunstall 2021-06-14 13:27:00 +01:00
commit 86ed1805fc
7 changed files with 505 additions and 7 deletions

113
scripts/aa_prop.py Normal file
View file

@ -0,0 +1,113 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Created on Mon June 14 2021
@author: tanu
'''
# FIXME: import dirs.py to get the basic dir paths available
#=======================================================================
# TASK
# Input:
# Output:
#=======================================================================
#%% load libraries
import os, sys
import pandas as pd
#import numpy as np
#from varname import nameof
import argparse
DEBUG = False
#=======================================================================
#%% specify input and curr dir
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
from reference_dict import oneletter_aa_dict
#=======================================================================
#%%
def get_aa_prop(df, col1 = 'aap1', col2 = 'aap2', col3 = 'aap_taylor', col4 = 'aap_kd', col5 = 'aap_polarity', col6 = 'aap_calcprop'):
"""Add amino acid properties for wt and mutant residues
@df: df containing one letter aa code for wt and mutant respectively
@type: pandas df
@col1: column adding 7 aa categories (no overlap; acidic, basic, amidic, hydrophobic, hydroxylic, aromatic, sulphur)
@type: str
@col2: column adding 9 aa categories (overlap; acidic, basic, polar, hydrophobic, hydrophilic, small, aromatic, aliphatic, special)
@type: str
@col3: column adding 8 aa categories (overlap; acidic, basic, polar, hydrophobic, small, aromatic, aliphatic, special)
@type: str
@col4: column adding 3 aa categories (no overlap, hydrophobic, neutral and hydrophilic according to KD scale)
@type: str
@col5: column adding 4 aa categories (no overlap, acidic, basic, neutral, non-polar)
@type: str
@col6: column adding 4 aa categories (neg, pos, polar, non-polar)
@type: str
returns df: with 6 added columns. If column names clash, the function column
name will override original column
@rtype: pandas df
"""
lookup_dict_p1 = dict()
lookup_dict_p2 = dict()
lookup_dict_taylor = dict()
lookup_dict_kd = dict()
lookup_dict_polarity = dict()
lookup_dict_calcprop = dict()
for k, v in oneletter_aa_dict.items():
lookup_dict_p1[k] = v['aa_prop1']
lookup_dict_p2[k] = v['aa_prop2']
lookup_dict_taylor[k] = v['aa_taylor']
lookup_dict_kd[k] = v['aa_prop_water']
lookup_dict_polarity[k] = v['aa_prop_polarity']
lookup_dict_calcprop[k] = v['aa_calcprop']
#if DEBUG:
# print('Key:', k, 'value:', v
# , '\n============================================================'
# , '\nlook up dict:\n')
df['wt_aap1'] = df['wild_type'].map(lookup_dict_p1)
df['mut_aap1'] = df['mutant_type'].map(lookup_dict_p1)
df['wt_aap2'] = df['wild_type'].map(lookup_dict_p2)
df['mut_aap2'] = df['mutant_type'].map(lookup_dict_p2)
df['wt_aap_taylor'] = df['wild_type'].map(lookup_dict_taylor)
df['mut_aap_taylor'] = df['mutant_type'].map(lookup_dict_taylor)
df['wt_aap_kd'] = df['wild_type'].map(lookup_dict_kd)
df['mut_aap_kd'] = df['mutant_type'].map(lookup_dict_kd)
df['wt_aap_polarity'] = df['wild_type'].map(lookup_dict_polarity)
df['mut_aap_polarity'] = df['mutant_type'].map(lookup_dict_polarity)
df['wt_aa_calcprop'] = df['wild_type'].map(lookup_dict_calcprop)
df['mut_aa_calcprop'] = df['mutant_type'].map(lookup_dict_calcprop)
return df
#========================================

191
scripts/add_aa_prop.py Normal file
View file

@ -0,0 +1,191 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Created on Tue Aug 6 12:56:03 2019
@author: tanu
'''
# FIXME: import dirs.py to get the basic dir paths available
#=======================================================================
# TASK:
#
# Input:
# Output:
#=======================================================================
#%% load libraries
import os, sys
import pandas as pd
#import numpy as np
#from varname import nameof
import argparse
#=======================================================================
#%% specify input and curr dir
homedir = os.path.expanduser('~')
# set working dir
os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd()
from aa_prop import get_aa_prop
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name', default = None)
arg_parser.add_argument('-g', '--gene', help='gene name', default = None)
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') # not used atm
args = arg_parser.parse_args()
#%% variable assignment: input and output
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
#%%=======================================================================
#==============
# directories
#==============
if not datadir:
datadir = homedir + '/' + 'git/Data'
if not indir:
indir = datadir + '/' + drug + '/input'
if not outdir:
outdir = datadir + '/' + drug + '/output'
#=======
# input
#=======
#in_filename = 'merged_df3.csv'
#in_filename = gene.lower() + '_complex_mcsm_norm.csv'
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SRY.csv' # gid
infile_mcsm = outdir + '/' + in_filename_mcsm
print('Input file: ', infile_mcsm
, '\n============================================================')
#=======
# output
#=======
out_filename_aaps = gene.lower() + '_mcsm_aaps.txt'
outfile_aaps = outdir + '/' + out_filename_aaps
print('Output file: ', outfile_aaps
, '\n============================================================')
#%% end of variable assignment for input and output files
#=======================================================================
#%% Read input files
print('Reading input file (merged file):', infile_mcsm)
comb_df = pd.read_csv(infile_mcsm, sep = ',')
print('Input filename: ', in_filename_mcsm
, '\nPath :', outdir
, '\nNo. of rows: ', len(comb_df)
, '\nNo. of cols: ', len(comb_df.columns)
, '\n============================================================')
# column names
list(comb_df.columns)
#%% sanity check
nrows_df = len(comb_df)
ncols_add = 12
expected_cols = len(comb_df.columns) + ncols_add
print('\nAdding aa properties for wt and mutant: ', ncols_add, ' columns in total'
, '\n===============================================================')
#%% call get_aa_prop():
get_aa_prop(df = comb_df)
#%% check dim of df
if len(comb_df) == nrows_df and len(comb_df.columns) == expected_cols:
print('Checking dim of df: '
, '\nPASS: df dim match'
, '\nno.of rows: ', len(comb_df)
, '\nno. of cols: ', len(comb_df.columns))
else:
print('\FAIL: dim mismatch'
, 'Expected rows: ', nrows_df
, '\Got: ', len(comb_df)
, '\nExpected cols: ', expected_cols
, '\nGot: ', len(comb_df.columns))
sys.exit('Aborting')
#%% summary output
sys.stdout = open(outfile_aaps, 'w')
print('################################################'
, '\n aa properties: ', gene
, '\n################################################')
print('\n-------------------'
, '\nWild-type: aap 1'
, '\n-------------------\n'
, comb_df['wt_aap1'].value_counts()
, '\n-------------------'
, '\nmutant-type: aap 1'
, '\n-------------------\n'
, comb_df['mut_aap1'].value_counts()
, '\n===================================================')
print('\n-------------------'
, '\nWild-type: aap 2'
, '\n-------------------\n'
, comb_df['wt_aap2'].value_counts()
, '\n-------------------'
, '\nmutant-type: aap 2'
, '\n-------------------\n'
, comb_df['mut_aap2'].value_counts()
, '\n===================================================')
print('\n-------------------'
, '\nWild-type: aap taylor'
, '\n-------------------\n'
, comb_df['wt_aap_taylor'].value_counts()
, '\n-------------------'
, '\nmutant-type: taylor'
, '\n-------------------\n'
, comb_df['mut_aap_taylor'].value_counts()
, '\n===================================================')
print('\n-------------------'
, '\nWild-type: aap water/kd'
, '\n-------------------\n'
, comb_df['wt_aap_kd'].value_counts()
, '\n-------------------'
, '\nmutant-type: water/kd'
, '\n-------------------\n'
, comb_df['mut_aap_kd'].value_counts()
, '\n===================================================')
print('\n-------------------'
, '\nWild-type: aap polarity'
, '\n-------------------\n'
, comb_df['wt_aap_polarity'].value_counts()
, '\n-------------------'
, '\nmutant-type: polarity'
, '\n-------------------\n'
, comb_df['mut_aap_polarity'].value_counts()
, '\n===================================================')
print('\n-------------------'
, '\nWild-type: aa calcprop'
, '\n-------------------\n'
, comb_df['wt_aa_calcprop'].value_counts()
, '\n-------------------'
, '\nmutant-type: aa calcprop'
, '\n-------------------\n'
, comb_df['mut_aa_calcprop'].value_counts()
, '\n===================================================')
#%% end of script
#=======================================================================

View file

@ -0,0 +1,22 @@
library(ggplot2)
library(tidyverse)
library(data.table)
########################################################################
# TASK: barplots to colour nssnps by chosen aa_property
########################################################################
aa_prop_bp <- function(plotdf
, position_colname
, fill_colname
, fill_colours
, leg_name = "LEGEND"){
mp = ggplot(df, aes(x = as.factor(eval(parse(text = pos_colname)))
#, y = ..count..
, fill = eval(parse(text = fill_colname))
)) +
geom_bar(position = 'identity') +
scale_fill_manual(values = fill_colours
, name = leg_name)
return(mp)
}

View file

@ -0,0 +1,62 @@
library(ggplot2)
library(tidyverse)
library(data.table)
setwd("~/git/LSHTM_analysis/scripts/plotting/functions")
getwd()
#############################################################
#===========================================
# load functions, data, dirs, hardocded vars
# that will be used in testing the functions
#===========================================
source("../plotting_data.R")
infile = "/home/tanu/git/Data/streptomycin/output/"
pd_df = plotting_data(infile)
my_df = pd_df[[1]]
my_df_u = pd_df[[2]]
my_df_u_lig = pd_df[[3]]
dup_muts = pd_df[[4]]
source("../plotting_globals.R")
drug = "streptomycin"
gene = "gid"
import_dirs(drug, gene)
#=====================
# functions to test
#=====================
source("stability_count_bp.R")
source("position_count_bp.R")
#################################################################
##############################################
# read a sample file containing muts and prop
###############################################
df<- read.csv(file.choose())
setDT(df)[, pos_count := .N, by = .(position)]
foo = data.frame(df$position, df$pos_count)
#snpsBYpos_df <- df %>%
# group_by(position) %>%
# summarize(snpsBYpos = mean(pos_count))
# subset df without duplicates for position
df2 = df[!duplicated(df$position)]
##################################################################
# ---------------------------------------
# barplot for nssnps, coloured by aa prop
# ---------------------------------------
pos_colname = "position"
aa_prop_colname = "mut_prop_water"
aa_prop_colours = c("black", "blue")
my_legname = "aa_prop: water"
# call function
aa_prop_bp(plotdf = df
, position_colname = pos_colname
, fill_colname = aa_prop_colname
, fill_colours = aa_prop_cols
, leg_name = my_legname)
#===============================================================

View file

@ -5,13 +5,15 @@ Created on Tue Aug 6 12:56:03 2019
@author: tanu
'''
# FIXME: import dirs.py to get the basic dir paths available
#=======================================================================
# TASK: calculate how many mutations result in
# electrostatic changes wrt wt
# Input: merged_df3
# Input: mCSM-normalised file or any file containing one-letter aa code
# of wt and mut
# Note: this can be easily modified into 3-letter code
# TODO: turn to a function
# Output: mut_elec_changes_results.txt
#=======================================================================

View file

@ -0,0 +1,106 @@
#
#
# Home
# Public
#
# Questions
# Tags
# Users
# Find a Job
# Jobs
# Companies
#
# Teams
# Stack Overflow for Teams Collaborate and share knowledge with a private group.
#
# How can put multiple plots side-by-side in shiny r?
# Asked 5 years, 5 months ago
# Active 2 years, 1 month ago
# Viewed 59k times
# 28
# 11
#
# In mainpanel, I try to handle this problem via fluidrow. However, one of my plot is optional to be displayed or not by users. When user clicks the button, the second plot appears below the first plot.
#
# fluidRow(
# column(2, align="right",
# plotOutput(outputId = "plotgraph1", width = "500px",height = "400px"),
# plotOutput(outputId = "plotgraph2", width = "500px",height = "400px")
# ))
#
# I played with "align" and "widths", but nothing changed.
# r
# plot
# ggplot2
# shiny
# Share
# Improve this question
# Follow
# edited Jan 21 '17 at 17:51
# Mike Wise
# 18.8k66 gold badges7171 silver badges9595 bronze badges
# asked Dec 20 '15 at 19:25
# can.u
# 42511 gold badge44 silver badges1111 bronze badges
# Add a comment
# 3 Answers
# 30
#
# So it is a couple years later, and while the others answers - including mine - are still valid, it is not how I would recommend approaching it today. Today I would lay it out using the grid.arrange from the gridExtra package.
#
# It allows any number of plots, and can lay them out in a grid checkerboard-like. (I was erroneously under the impression splitLayout only worked with two).
# It has more customization possibilities (you can specify rows, columns, headers, footer, padding, etc.)
# It is ultimately easier to use, even for two plots, since laying out in the UI is finicky - it can be difficult to predict what Bootstrap will do with your elements when the screen size changes.
# Since this question gets a lot of traffic, I kind of think more alternative should be here.
#
# The cowplot package is also worth looking into, it offers similar functionality, but I am not so familiar with it.
#
# Here is a small shiny program demonstrating that:
#
library(shiny)
library(ggplot2)
library(gridExtra)
u <- shinyUI(fluidPage(
titlePanel("title panel"),
sidebarLayout(position = "left",
sidebarPanel("sidebar panel",
checkboxInput("donum1", "Make #1 plot", value = T),
checkboxInput("donum2", "Make #2 plot", value = F),
checkboxInput("donum3", "Make #3 plot", value = F),
sliderInput("wt1","Weight 1",min=1,max=10,value=1),
sliderInput("wt2","Weight 2",min=1,max=10,value=1),
sliderInput("wt3","Weight 3",min=1,max=10,value=1)
),
mainPanel("main panel",
column(6,plotOutput(outputId="plotgraph", width="500px",height="400px"))
))))
s <- shinyServer(function(input, output)
{
set.seed(123)
pt1 <- reactive({
if (!input$donum1) return(NULL)
qplot(rnorm(500),fill=I("red"),binwidth=0.2,main="plotgraph1")
})
pt2 <- reactive({
if (!input$donum2) return(NULL)
qplot(rnorm(500),fill=I("blue"),binwidth=0.2,main="plotgraph2")
})
pt3 <- reactive({
if (!input$donum3) return(NULL)
qplot(rnorm(500),fill=I("green"),binwidth=0.2,main="plotgraph3")
})
output$plotgraph = renderPlot({
ptlist <- list(pt1(),pt2(),pt3())
wtlist <- c(input$wt1,input$wt2,input$wt3)
# remove the null plots from ptlist and wtlist
to_delete <- !sapply(ptlist,is.null)
ptlist <- ptlist[to_delete]
wtlist <- wtlist[to_delete]
if (length(ptlist)==0) return(NULL)
grid.arrange(grobs=ptlist,widths=wtlist,ncol=length(ptlist))
})
})
shinyApp(u,s)

View file

@ -132,9 +132,11 @@ qualities_taylor = { ('R', 'H', 'K'): 'Basic'
, ('C', 'G', 'P'): 'Special'
}
# binary classification: hydrophilic or hydrophobic
qualities_water = { ('D', 'E', 'N', 'P', 'Q', 'R', 'S'): 'hydrophilic'
, ('A', 'C', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'T', 'V', 'W', 'X', 'Y'): 'hydrophobic'
# ternary classification: hydrophobic --> neutral --> hydrophilic (KD scale)
#http://www.imgt.org/IMGTeducation/Aide-memoire/_UK/aminoacids/IMGTclasses.html
qualities_water = { ('I','V','L','F','C','M','A','W'): 'hydrophobic'
, ('G','T','S','Y','P','H'): 'neutral'
, ('N','D','Q','E','K','R'): 'hydrophilic'
}
# polarity: no overlap