changing category of ambiguous muts
This commit is contained in:
parent
eb5491aad9
commit
46b43cf261
3 changed files with 55 additions and 4 deletions
|
@ -552,6 +552,14 @@ else:
|
||||||
output_cols = combined_df_all.columns
|
output_cols = combined_df_all.columns
|
||||||
|
|
||||||
#%% IMPORTANT result info
|
#%% IMPORTANT result info
|
||||||
|
if combined_df_all['or_mychisq'].isna().sum() == len(combined_df) - len(afor_df):
|
||||||
|
print('PASS: No. of NA in or_mychisq matches expected length'
|
||||||
|
, '\nNo. of with NA in or_mychisq:', combined_df_all['or_mychisq'].isna().sum()
|
||||||
|
, '\nNo. of NA in or_kin:', combined_df_all['or_kin'].isna().sum())
|
||||||
|
else:
|
||||||
|
print('FAIL: No. of NA in or_mychisq does not match expected length')
|
||||||
|
|
||||||
|
|
||||||
if combined_df_all.shape[0] == outdf_expected_rows:
|
if combined_df_all.shape[0] == outdf_expected_rows:
|
||||||
print('\nINFORMARIONAL ONLY: combined_df_all has duplicate muts present but with unique ref and alt allele'
|
print('\nINFORMARIONAL ONLY: combined_df_all has duplicate muts present but with unique ref and alt allele'
|
||||||
, '\n=============================================================')
|
, '\n=============================================================')
|
||||||
|
|
|
@ -59,18 +59,23 @@ rm(my_df, upos, dup_muts)
|
||||||
# my_df_u
|
# my_df_u
|
||||||
|
|
||||||
# quick checks
|
# quick checks
|
||||||
head(my_df_u[, c("mutation", "mutation2")])
|
head(my_df_u[, c("mutation")])
|
||||||
|
|
||||||
cols_to_extract = c("mutationinformation", "mutation", "or_mychisq", "or_kin", "af", "af_kin")
|
cols_to_extract = c("mutationinformation", "mutation", "or_mychisq", "or_kin", "af", "af_kin")
|
||||||
foo = my_df_u[, colnames(my_df_u)%in% cols_to_extract]
|
foo = my_df_u[, colnames(my_df_u)%in% cols_to_extract]
|
||||||
|
|
||||||
|
|
||||||
which(is.na(my_df_u$af_kin)) == which(is.na(my_df_u$af))
|
table(which(is.na(my_df_u$af_kin)) == which(is.na(my_df_u$af)))
|
||||||
|
|
||||||
|
baz = read.csv(file.choose())
|
||||||
|
|
||||||
baz = cbind(my_df_u$mutation, my_df_u$or_mychisq, bar$mutation, bar$or_mychisq)
|
baz = cbind(my_df_u$mutation, my_df_u$or_mychisq, bar$mutation, bar$or_mychisq)
|
||||||
|
baz = as.data.frame(baz)
|
||||||
colnames(baz) = c("my_df_u_muts", "my_df_u_or", "real_muts", "real_or")
|
colnames(baz) = c("my_df_u_muts", "my_df_u_or", "real_muts", "real_or")
|
||||||
|
sum(is.na(baz$my_df_u_or)) == sum(is.na(my_df_u$or_mychisq))
|
||||||
|
|
||||||
|
cat("\nNo. of with NA in or_mychisq:", sum(is.na(my_df_u$or_mychisq))
|
||||||
|
,"\nNo. of NA in or_kin:" , sum(is.na(my_df_u$or_kin)))
|
||||||
|
|
||||||
# infile 2: gene associated meta data
|
# infile 2: gene associated meta data
|
||||||
#in_filename_gene_metadata = paste0(tolower(gene), "_meta_data_with_AFandOR.csv")
|
#in_filename_gene_metadata = paste0(tolower(gene), "_meta_data_with_AFandOR.csv")
|
||||||
|
@ -109,7 +114,8 @@ gene_metadata <- read.csv(infile_gene_metadata
|
||||||
cat("Dim:", dim(gene_metadata))
|
cat("Dim:", dim(gene_metadata))
|
||||||
|
|
||||||
|
|
||||||
# counting NAs in AF, OR cols:
|
# counting NAs in AF, OR cols
|
||||||
|
# or_mychisq
|
||||||
if (identical(sum(is.na(my_df_u$or_mychisq))
|
if (identical(sum(is.na(my_df_u$or_mychisq))
|
||||||
, sum(is.na(my_df_u$pval_fisher))
|
, sum(is.na(my_df_u$pval_fisher))
|
||||||
, sum(is.na(my_df_u$af)))){
|
, sum(is.na(my_df_u$af)))){
|
||||||
|
@ -123,7 +129,7 @@ if (identical(sum(is.na(my_df_u$or_mychisq))
|
||||||
, "\nNA in AF:", sum(is.na(my_df_u$af)))
|
, "\nNA in AF:", sum(is.na(my_df_u$af)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# or kin
|
||||||
if (identical(sum(is.na(my_df_u$or_kin))
|
if (identical(sum(is.na(my_df_u$or_kin))
|
||||||
, sum(is.na(my_df_u$pwald_kin))
|
, sum(is.na(my_df_u$pwald_kin))
|
||||||
, sum(is.na(my_df_u$af_kin)))){
|
, sum(is.na(my_df_u$af_kin)))){
|
||||||
|
@ -139,6 +145,31 @@ if (identical(sum(is.na(my_df_u$or_kin))
|
||||||
|
|
||||||
str(gene_metadata)
|
str(gene_metadata)
|
||||||
|
|
||||||
|
# change category of ambiguos mutations
|
||||||
|
table(gene_metadata$mutation_info)
|
||||||
|
|
||||||
|
cols_to_extract2 = c("mutationinformation", "mutation", "mutation_info")
|
||||||
|
foo2 = gene_metadata[, colnames(gene_metadata)%in% cols_to_extract2]
|
||||||
|
|
||||||
|
dr_muts = foo2[foo2$mutation_info == dr_muts_col,]
|
||||||
|
other_muts = foo2[foo2$mutation_info == other_muts_col,]
|
||||||
|
|
||||||
|
common_muts = dr_muts[dr_muts$mutation%in%other_muts$mutation,]
|
||||||
|
#write.csv(common_muts, 'common_muts.csv')
|
||||||
|
|
||||||
|
# FIXME read properly
|
||||||
|
# "ambiguous_mut_names.csv"
|
||||||
|
#"pnca_p.gly108arg", "pnca_p.gly132ala", "pnca_p.val180phe"
|
||||||
|
ambiguous_muts = read.csv(file.choose())
|
||||||
|
ambiguous_muts_names = ambiguous_muts$mutation
|
||||||
|
|
||||||
|
common_muts_all = gene_metadata[gene_metadata$mutation%in%ambiguous_muts_names,]
|
||||||
|
|
||||||
|
gene_metadata2 = gene_metadata
|
||||||
|
|
||||||
|
if (gene_metadata$mutation_info[gene_metadata$mutation%in%ambiguous_muts_names] == other_muts_col){
|
||||||
|
print('change me')
|
||||||
|
}
|
||||||
###################################################################
|
###################################################################
|
||||||
# combining: PS
|
# combining: PS
|
||||||
###################################################################
|
###################################################################
|
||||||
|
|
|
@ -52,6 +52,18 @@ in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
||||||
infile_params = paste0(outdir, "/", in_filename_params)
|
infile_params = paste0(outdir, "/", in_filename_params)
|
||||||
cat(paste0("Input file 1:", infile_params) )
|
cat(paste0("Input file 1:", infile_params) )
|
||||||
|
|
||||||
|
|
||||||
|
dr_muts_col = paste0('dr_mutations_', drug)
|
||||||
|
dr_muts_col = paste0('other_mutations_', drug)
|
||||||
|
|
||||||
|
cat('Extracting columns based on variables:\n'
|
||||||
|
, drug
|
||||||
|
, '\n'
|
||||||
|
, dr_muts_col
|
||||||
|
, '\n'
|
||||||
|
, other_muts_col
|
||||||
|
, '\n===============================================================')
|
||||||
|
|
||||||
#%%===============================================================
|
#%%===============================================================
|
||||||
###########################
|
###########################
|
||||||
# Read file: struct params
|
# Read file: struct params
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue