bug fixes and massive clean up of data extraction script

2020-03-23 13:33:25 +00:00 · 2020-03-23 13:33:25 +00:00 · eb021349fe
commit eb021349fe
parent 8df0b7d920
3 changed files with 818 additions and 454 deletions
--- a/mcsm_analysis/pyrazinamide/scripts/plotting/logolas_logoplot.R
+++ b/mcsm_analysis/pyrazinamide/scripts/plotting/logolas_logoplot.R
@ -68,10 +68,6 @@ table(my_df$position == my_df$Position)
 c1 = unique(my_df$Position) # 130
 nrow(my_df) # 3092 
 #FIXME
 #!!! RESOLVE !!!
 # get freq count of positions and add to the df
@ -99,9 +95,6 @@ my_data_snp = my_df[my_df$occurrence!=1,] #3072, 36...3019
 u = unique(my_data_snp$Position) #96
 ########################################################################
 #               end of data extraction and cleaning for plots          #
 ########################################################################
--- a/meta_data_analysis/.Rhistory
+++ b/meta_data_analysis/.Rhistory
@ -1,8 +1,3 @@
 , stringsAsFactors = F)
 x = as.numeric(grepl(i,raw_data$all_muts_pza))
 # DV: pyrazinamide 0 or 1
 y = as.numeric(raw_data$pyrazinamide)
 table(y,x)
 # run glm model
 model = glm(y ~ x, family = binomial)
 #model = glm(y ~ x, family = binomial(link = "logit"))
@ -510,3 +505,8 @@ outdir = paste0("../mcsm_analysis/",drug,"/Data/")
 outFile = "meta_data_with_AFandOR.csv"
 output_filename = paste0(outdir, outFile)
 output_filename
 pnca_snps_or = read.csv(file.choose()
 , stringsAsFactors = F
 , header = T)
 View(pnca_snps_or)
 View(pnca_snps_or)
--- a/meta_data_analysis/pnca_data_extraction.py
+++ b/meta_data_analysis/pnca_data_extraction.py