bug fixes and massive clean up of data extraction script

This commit is contained in:
Tanushree Tunstall 2020-03-23 13:33:25 +00:00
parent 8df0b7d920
commit eb021349fe
3 changed files with 818 additions and 454 deletions

View file

@ -68,10 +68,6 @@ table(my_df$position == my_df$Position)
c1 = unique(my_df$Position) # 130 c1 = unique(my_df$Position) # 130
nrow(my_df) # 3092 nrow(my_df) # 3092
#FIXME #FIXME
#!!! RESOLVE !!! #!!! RESOLVE !!!
# get freq count of positions and add to the df # get freq count of positions and add to the df
@ -99,9 +95,6 @@ my_data_snp = my_df[my_df$occurrence!=1,] #3072, 36...3019
u = unique(my_data_snp$Position) #96 u = unique(my_data_snp$Position) #96
######################################################################## ########################################################################
# end of data extraction and cleaning for plots # # end of data extraction and cleaning for plots #
######################################################################## ########################################################################

View file

@ -1,8 +1,3 @@
, stringsAsFactors = F)
x = as.numeric(grepl(i,raw_data$all_muts_pza))
# DV: pyrazinamide 0 or 1
y = as.numeric(raw_data$pyrazinamide)
table(y,x)
# run glm model # run glm model
model = glm(y ~ x, family = binomial) model = glm(y ~ x, family = binomial)
#model = glm(y ~ x, family = binomial(link = "logit")) #model = glm(y ~ x, family = binomial(link = "logit"))
@ -510,3 +505,8 @@ outdir = paste0("../mcsm_analysis/",drug,"/Data/")
outFile = "meta_data_with_AFandOR.csv" outFile = "meta_data_with_AFandOR.csv"
output_filename = paste0(outdir, outFile) output_filename = paste0(outdir, outFile)
output_filename output_filename
pnca_snps_or = read.csv(file.choose()
, stringsAsFactors = F
, header = T)
View(pnca_snps_or)
View(pnca_snps_or)

File diff suppressed because it is too large Load diff