bug fixes and massive clean up of data extraction script
This commit is contained in:
parent
8df0b7d920
commit
eb021349fe
3 changed files with 818 additions and 454 deletions
|
@ -68,10 +68,6 @@ table(my_df$position == my_df$Position)
|
||||||
c1 = unique(my_df$Position) # 130
|
c1 = unique(my_df$Position) # 130
|
||||||
nrow(my_df) # 3092
|
nrow(my_df) # 3092
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#FIXME
|
#FIXME
|
||||||
#!!! RESOLVE !!!
|
#!!! RESOLVE !!!
|
||||||
# get freq count of positions and add to the df
|
# get freq count of positions and add to the df
|
||||||
|
@ -99,9 +95,6 @@ my_data_snp = my_df[my_df$occurrence!=1,] #3072, 36...3019
|
||||||
u = unique(my_data_snp$Position) #96
|
u = unique(my_data_snp$Position) #96
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of data extraction and cleaning for plots #
|
# end of data extraction and cleaning for plots #
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
|
@ -1,8 +1,3 @@
|
||||||
, stringsAsFactors = F)
|
|
||||||
x = as.numeric(grepl(i,raw_data$all_muts_pza))
|
|
||||||
# DV: pyrazinamide 0 or 1
|
|
||||||
y = as.numeric(raw_data$pyrazinamide)
|
|
||||||
table(y,x)
|
|
||||||
# run glm model
|
# run glm model
|
||||||
model = glm(y ~ x, family = binomial)
|
model = glm(y ~ x, family = binomial)
|
||||||
#model = glm(y ~ x, family = binomial(link = "logit"))
|
#model = glm(y ~ x, family = binomial(link = "logit"))
|
||||||
|
@ -510,3 +505,8 @@ outdir = paste0("../mcsm_analysis/",drug,"/Data/")
|
||||||
outFile = "meta_data_with_AFandOR.csv"
|
outFile = "meta_data_with_AFandOR.csv"
|
||||||
output_filename = paste0(outdir, outFile)
|
output_filename = paste0(outdir, outFile)
|
||||||
output_filename
|
output_filename
|
||||||
|
pnca_snps_or = read.csv(file.choose()
|
||||||
|
, stringsAsFactors = F
|
||||||
|
, header = T)
|
||||||
|
View(pnca_snps_or)
|
||||||
|
View(pnca_snps_or)
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue