ks test script added

This commit is contained in:
Tanushree Tunstall 2020-09-07 15:27:53 +01:00
parent b4affa0c94
commit 648be02665
4 changed files with 211 additions and 117 deletions

View file

@ -1,45 +0,0 @@
#!/usr/bin/env python3
#=======================================================================
#%% useful links
#https://towardsdatascience.com/autoviz-automatically-visualize-any-dataset-ba2691a8b55a
#https://pypi.org/project/autoviz/
#=======================================================================
import os, sys
import pandas as pd
import numpy as np
import re
import argparse
from autoviz.AutoViz_Class import AutoViz_Class
homedir = os.path.expanduser('~')
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
#%%============================================================================
# variables
gene = 'pncA'
drug = 'pyrazinamide'
#%%============================================================================
#==============
# directories
#==============
datadir = homedir + '/' + 'git/Data'
indir = datadir + '/' + drug + '/input'
outdir = datadir + '/' + drug + '/output'
#=======
# input
#=======
in_filename_plotting = 'car_design.csv'
in_filename_plotting = gene.lower() + '_all_params.csv'
infile_plotting = outdir + '/' + in_filename_plotting
print('plotting file: ', infile_plotting
, '\n============================================================')
#=======================================================================
plotting_df = pd.read_csv(infile_plotting, sep = ',')
#Instantiate the AutoViz class
AV = AutoViz_Class()
df = AV.AutoViz(infile_plotting)
#df2 = AV.AutoViz(plotting_df)
plotting_df.columns[~plotting_df.columns.isin(df.columns)]

View file

@ -1,4 +1,3 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Basic lineage barplot showing numbers

View file

@ -1,71 +0,0 @@
#=============
# merged_df2
#=============
----------------
# no. of samples
----------------
Var1 Freq
1 8
2 lineage1 144
3 lineage1;lineage2 3
4 lineage1;lineage4 4
5 lineage2 1886
6 lineage2;lineage4 19
7 lineage3 190
8 lineage3;lineage4 11
9 lineage4 2213
10 lineage4;lineage6 1
11 lineage4;lineage7 1
12 lineage4;lineageBOV 1
13 lineage5 31
14 lineage6 9
15 lineage7 3
16 lineageBOV 392
----------------
# no. of nsSNPs
----------------
sel_lineages num_snps_u total_samples
1 lineage1 74 144
2 lineage2 277 1886
3 lineage3 104 190
4 lineage4 311 2213
5 lineage5 18 31
6 lineage6 8 9
7 lineage7 1 3
#=============
# merged_df2_comp
#=============
----------------
# no. of samples
----------------
Var1 Freq
1 3
2 lineage1 108
3 lineage1;lineage2 2
4 lineage1;lineage4 2
5 lineage2 1497
6 lineage2;lineage4 13
7 lineage3 154
8 lineage3;lineage4 3
9 lineage4 1846
10 lineage4;lineageBOV 1
11 lineage5 12
12 lineage6 2
13 lineageBOV 36
----------------
# no. of nsSNPs
----------------
sel_lineages num_snps_u total_samples
1 lineage1 42 108
2 lineage2 141 1497
3 lineage3 75 154
4 lineage4 148 1846
5 lineage5 9 12
6 lineage6 2 2
7 lineage7 0 0