saving from work
This commit is contained in:
parent
77cc5bf42c
commit
8df0b7d920
2 changed files with 33 additions and 9 deletions
|
@ -9,6 +9,7 @@ Created on Tue Jun 25 08:46:36 2019
|
|||
# load libraries
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from Bio import SeqIO
|
||||
############################################
|
||||
#********************************************************************
|
||||
|
@ -47,7 +48,7 @@ print("Input file is:", infile_meta_data)
|
|||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir = 'git/Data/pyrazinamide/output'
|
||||
outdir = 'git/Data/pyrazinamide/output'
|
||||
# filenames in respective sections
|
||||
|
||||
################## end of variable assignment for input and output files
|
||||
|
@ -86,7 +87,7 @@ print("new length:", len(my_fasta))
|
|||
#############
|
||||
# read mutant_info file and extract cols with positions and mutant_info
|
||||
# This should be all samples with pncA muts
|
||||
#my_data = pd.read_csv('mcsm_complex1_normalised.csv') #335, 15
|
||||
#my_data = pd.read_csv('mcsm_complex1_normalised.csv')
|
||||
my_data = pd.read_csv(infile_meta_data)
|
||||
list(my_data.columns)
|
||||
#my_data['OR'].value_counts()
|
||||
|
@ -95,12 +96,34 @@ list(my_data.columns)
|
|||
#FIXME: You need a better way to identify this
|
||||
# ideally this file should not contain any non_struc pos
|
||||
# remove positions not in the structure
|
||||
my_data = my_data[my_data.position != ns_pos_o] #3092, 22
|
||||
my_data = my_data[my_data.position != ns_pos_o]
|
||||
|
||||
# if multiple positions, then try the example below;
|
||||
# https://stackoverflow.com/questions/29017525/deleting-rows-based-on-multiple-conditions-python-pandas
|
||||
#df = df[(df.one > 0) | (df.two > 0) | (df.three > 0) & (df.four < 1)]
|
||||
|
||||
# count mutations per sample
|
||||
mut_info = my_data[['id', 'Mutationinformation', 'wild_type', 'position', 'mutant_type']]
|
||||
|
||||
# test
|
||||
foo = mut_info[mut_info.Mutationinformation.str.contains('C72Y')]
|
||||
|
||||
foo = mut_info.pivot_table(values = ['Mutationinformation']
|
||||
, index = ['Mutationinformation', 'id']
|
||||
# , columns =
|
||||
, aggfunc = 'count')
|
||||
|
||||
# table
|
||||
foo_tab = mut_info.pivot_table(values = ['Mutationinformation']
|
||||
# , index = ['Mutationinformation']
|
||||
, columns = ['id', 'Mutationinformation']
|
||||
, aggfunc = 'count'
|
||||
# , margins = True)
|
||||
)
|
||||
foo_tab.stack('id')
|
||||
|
||||
mut_info.to_csv('mutinfo.csv')
|
||||
|
||||
mut_info1 = my_data[['position', 'mutant_type']]
|
||||
#%%
|
||||
################
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue