bug fixes and massive clean up of data extraction script

2020-03-23 13:33:25 +00:00 · 2020-03-23 13:33:25 +00:00 · 53d19d5dd8
commit 53d19d5dd8
parent a5356cf88b
3 changed files with 818 additions and 454 deletions
--- a/mcsm_analysis/pyrazinamide/scripts/plotting/logolas_logoplot.R
+++ b/mcsm_analysis/pyrazinamide/scripts/plotting/logolas_logoplot.R
@ -68,10 +68,6 @@ table(my_df$position == my_df$Position)
 c1 = unique(my_df$Position) # 130
 nrow(my_df) # 3092 

-
-
-
-
 #FIXME
 #!!! RESOLVE !!!
 # get freq count of positions and add to the df
@ -99,9 +95,6 @@ my_data_snp = my_df[my_df$occurrence!=1,] #3072, 36...3019
 u = unique(my_data_snp$Position) #96


-
-
-
 ########################################################################
 #               end of data extraction and cleaning for plots          #
 ########################################################################
--- a/meta_data_analysis/.Rhistory
+++ b/meta_data_analysis/.Rhistory
@ -1,8 +1,3 @@
-, stringsAsFactors = F)
-x = as.numeric(grepl(i,raw_data$all_muts_pza))
-# DV: pyrazinamide 0 or 1
-y = as.numeric(raw_data$pyrazinamide)
-table(y,x)
 # run glm model
 model = glm(y ~ x, family = binomial)
 #model = glm(y ~ x, family = binomial(link = "logit"))
@ -510,3 +505,8 @@ outdir = paste0("../mcsm_analysis/",drug,"/Data/")
 outFile = "meta_data_with_AFandOR.csv"
 output_filename = paste0(outdir, outFile)
 output_filename
+pnca_snps_or = read.csv(file.choose()
+, stringsAsFactors = F
+, header = T)
+View(pnca_snps_or)
+View(pnca_snps_or)
--- a/meta_data_analysis/pnca_data_extraction.py
+++ b/meta_data_analysis/pnca_data_extraction.py