Merge branch 'embb_dev'
This commit is contained in:
commit
4f52627740
10 changed files with 51 additions and 21 deletions
|
@ -243,7 +243,7 @@ def format_mcsm_output(mcsm_outputcsv):
|
||||||
mcsm_data = mcsm_data.drop_duplicates(['mutationinformation'])
|
mcsm_data = mcsm_data.drop_duplicates(['mutationinformation'])
|
||||||
print('Dim of data after removing duplicate muts:', mcsm_data.shape
|
print('Dim of data after removing duplicate muts:', mcsm_data.shape
|
||||||
, '\n===========================================================')
|
, '\n===========================================================')
|
||||||
#%%=====================================================================
|
#%%=====================================================================
|
||||||
#############
|
#############
|
||||||
# Create col: duet_outcome
|
# Create col: duet_outcome
|
||||||
#############
|
#############
|
||||||
|
|
|
@ -79,13 +79,13 @@ gene_match = gene + '_p.'
|
||||||
# directories
|
# directories
|
||||||
#============
|
#============
|
||||||
if not datadir:
|
if not datadir:
|
||||||
datadir = homedir + '/' + 'git/Data'
|
datadir = homedir + '/git/Data/'
|
||||||
|
|
||||||
if not indir:
|
if not indir:
|
||||||
indir = datadir + '/' + drug + '/input'
|
indir = datadir + drug + 'input/'
|
||||||
|
|
||||||
if not outdir:
|
if not outdir:
|
||||||
outdir = datadir + '/' + drug + '/output'
|
outdir = datadir + drug + 'output/'
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# input
|
# input
|
||||||
|
@ -95,7 +95,7 @@ if pdb_filename:
|
||||||
else:
|
else:
|
||||||
in_filename_pdb = gene.lower() + '_complex.pdb'
|
in_filename_pdb = gene.lower() + '_complex.pdb'
|
||||||
|
|
||||||
infile_pdb = indir + '/' + in_filename_pdb
|
infile_pdb = indir + in_filename_pdb
|
||||||
|
|
||||||
#in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile_mcsm_snps, from data_extraction.py)
|
#in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile_mcsm_snps, from data_extraction.py)
|
||||||
#infile_snps = outdir + '/' + in_filename_snps
|
#infile_snps = outdir + '/' + in_filename_snps
|
||||||
|
@ -104,8 +104,8 @@ if mutation_filename:
|
||||||
in_filename_snps = mutation_filename
|
in_filename_snps = mutation_filename
|
||||||
else:
|
else:
|
||||||
in_filename_snps = gene.lower() + '_mcsm_formatted_snps.csv'
|
in_filename_snps = gene.lower() + '_mcsm_formatted_snps.csv'
|
||||||
|
|
||||||
infile_snps = outdir + '/' + in_filename_snps
|
infile_snps = outdir + in_filename_snps
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
|
@ -113,13 +113,13 @@ infile_snps = outdir + '/' + in_filename_snps
|
||||||
# mcsm_results globals
|
# mcsm_results globals
|
||||||
if not result_urls:
|
if not result_urls:
|
||||||
result_urls_filename = gene.lower() + '_result_urls.txt'
|
result_urls_filename = gene.lower() + '_result_urls.txt'
|
||||||
result_urls = outdir + '/' + result_urls_filename
|
result_urls = outdir + result_urls_filename
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print('DEBUG: Result URLs:', result_urls)
|
print('DEBUG: Result URLs:', result_urls)
|
||||||
|
|
||||||
if not mcsm_output:
|
if not mcsm_output:
|
||||||
mcsm_output_filename = gene.lower() + '_mcsm_output.csv'
|
mcsm_output_filename = gene.lower() + '_mcsm_output.csv'
|
||||||
mcsm_output = outdir + '/' + mcsm_output_filename
|
mcsm_output = outdir + mcsm_output_filename
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print('DEBUG: mCSM output CSV file:', mcsm_output)
|
print('DEBUG: mCSM output CSV file:', mcsm_output)
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ if not mcsm_output:
|
||||||
#out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
#out_filename_format = gene.lower() + '_mcsm_processed.csv'
|
||||||
if not outfile_format:
|
if not outfile_format:
|
||||||
out_filename_format = gene.lower() + '_complex_mcsm_norm.csv'
|
out_filename_format = gene.lower() + '_complex_mcsm_norm.csv'
|
||||||
outfile_format = outdir + '/' + out_filename_format
|
outfile_format = outdir + out_filename_format
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print('DEBUG: formatted CSV output:', outfile_format)
|
print('DEBUG: formatted CSV output:', outfile_format)
|
||||||
#%%=====================================================================
|
#%%=====================================================================
|
||||||
|
|
0
mcsm_na/examples.py
Normal file → Executable file
0
mcsm_na/examples.py
Normal file → Executable file
0
mcsm_na/format_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/format_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/get_results_mcsm_na.py
Normal file → Executable file
0
mcsm_na/get_results_mcsm_na.py
Normal file → Executable file
9
mcsm_na/run_format_results_mcsm_na.py
Normal file → Executable file
9
mcsm_na/run_format_results_mcsm_na.py
Normal file → Executable file
|
@ -52,15 +52,16 @@ if not outdir:
|
||||||
outdir_na = outdir + 'mcsm_na_results/'
|
outdir_na = outdir + 'mcsm_na_results/'
|
||||||
|
|
||||||
# Input file
|
# Input file
|
||||||
infile_mcsm_na = outdir_na + gene + '_output_combined_clean.tsv'
|
infile_mcsm_na = outdir_na + gene.lower() + '_output_combined_clean.tsv'
|
||||||
|
|
||||||
# Formatted output file
|
# Formatted output file
|
||||||
outfile_mcsm_na_f = outdir_na + gene + '_complex_mcsm_na_norm.csv'
|
outfile_mcsm_na_f = outdir_na + gene.lower() + '_complex_mcsm_na_norm.csv'
|
||||||
|
|
||||||
#==========================
|
#===========================================
|
||||||
# CALL: format_results_mcsm_na()
|
# CALL: format_results_mcsm_na()
|
||||||
# Data: gid+streptomycin
|
# Data: gid+streptomycin
|
||||||
#==========================
|
# Data: rpob+rifampicin, date: 18/11/2021
|
||||||
|
#===========================================
|
||||||
print('Formatting results for:', infile_mcsm_na)
|
print('Formatting results for:', infile_mcsm_na)
|
||||||
mcsm_na_df_f = format_mcsm_na_output(mcsm_na_output_tsv = infile_mcsm_na)
|
mcsm_na_df_f = format_mcsm_na_output(mcsm_na_output_tsv = infile_mcsm_na)
|
||||||
|
|
||||||
|
|
|
@ -18,14 +18,14 @@ print(my_prediction_url)
|
||||||
|
|
||||||
# TODO: add cmd line args
|
# TODO: add cmd line args
|
||||||
#gene = 'gid'
|
#gene = 'gid'
|
||||||
drug = 'streptomycin'
|
drug = ''
|
||||||
datadir = homedir + '/git/Data'
|
datadir = homedir + '/git/Data/'
|
||||||
indir = datadir + '/' + drug + '/input'
|
indir = datadir + drug + 'input/'
|
||||||
outdir = datadir + '/' + drug + '/output'
|
outdir = datadir + drug + 'output/'
|
||||||
outdir_mcsm_na = outdir + 'mcsm_na_results'
|
outdir_mcsm_na = outdir + 'mcsm_na_results/'
|
||||||
|
|
||||||
my_nuc_type = 'RNA'
|
my_nuc_type = 'RNA'
|
||||||
my_pdb_file = indir + '/gid_complex.pdb'
|
my_pdb_file = indir + gene.lower() + '_complex.pdb'
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
# batch 26: 25.txt # RAN: 16 Feb:
|
# batch 26: 25.txt # RAN: 16 Feb:
|
||||||
|
|
27
mcsm_na/split_csv.sh
Executable file
27
mcsm_na/split_csv.sh
Executable file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# FIXME: This is written for expediency to kickstart running dynamut and mcsm-NA
|
||||||
|
|
||||||
|
# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh <input file> <output dir> <chunk size in lines>
|
||||||
|
# copy your snp file to split into the mcsm_na dir
|
||||||
|
|
||||||
|
INFILE=$1
|
||||||
|
OUTDIR=$2
|
||||||
|
CHUNK=$3
|
||||||
|
|
||||||
|
mkdir -p ${OUTDIR}/${CHUNK}
|
||||||
|
cd ${OUTDIR}/${CHUNK}
|
||||||
|
|
||||||
|
split ../../${INFILE} -l ${CHUNK} -d snp_batch_
|
||||||
|
|
||||||
|
# use case
|
||||||
|
#~/git/LSHTM_analysis/mcsm_na/split_csv.sh gid_mcsm_formatted_snps.csv snp_batches 50
|
||||||
|
#~/git/LSHTM_analysis/mcsm_na/split_csv.sh embb_mcsm_formatted_snps.csv snp_batches 50
|
||||||
|
|
||||||
|
|
||||||
|
#~/git/LSHTM_analysis/mcsm_na/split_csv.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 20 # date: 17/11/2021
|
||||||
|
|
||||||
|
|
||||||
|
#acccidently replaced file original rpob batches
|
||||||
|
|
||||||
|
#~/git/LSHTM_analysis/mcsm_na/split_csv.sh 5uhc_mcsm_formatted_snps_chain.csv snp_batches_5uhc 20 # date: 17/11/2021
|
0
mcsm_na/submit_mcsm_na.py
Normal file → Executable file
0
mcsm_na/submit_mcsm_na.py
Normal file → Executable file
|
@ -122,7 +122,9 @@ if gene.lower() == "gid":
|
||||||
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SRY.csv' # was incorrectly SAM previously
|
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SRY.csv' # was incorrectly SAM previously
|
||||||
if gene.lower() == "embb":
|
if gene.lower() == "embb":
|
||||||
print("\nReading mCSM file for gene:", gene)
|
print("\nReading mCSM file for gene:", gene)
|
||||||
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm1.csv'
|
#in_filename_mcsm = gene.lower() + '_complex_mcsm_norm1.csv' #798
|
||||||
|
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm2.csv' #844
|
||||||
|
|
||||||
if gene.lower() in gene_list_normal:
|
if gene.lower() in gene_list_normal:
|
||||||
print("\nReading mCSM file for gene:", gene)
|
print("\nReading mCSM file for gene:", gene)
|
||||||
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm.csv'
|
in_filename_mcsm = gene.lower() + '_complex_mcsm_norm.csv'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue