This commit is contained in:
Tanushree Tunstall 2022-08-25 17:57:58 +01:00
parent cd76a4b919
commit afa9166ca8
2 changed files with 73 additions and 94 deletions

View file

@ -48,7 +48,7 @@ site_snp_count_bp <- function (plotdf
#-------------------------------------------
# adding column: snpcount for each position
#-------------------------------------------
#setDT(plotdf)[, pos_count_check := .N, by = .(eval(parse(text = df_colname)))]
#setDT(plotdf)[, position_count_check := .N, by = .(eval(parse(text = df_colname)))]
# from dplyr
plotdf = plotdf %>%
@ -57,21 +57,21 @@ site_snp_count_bp <- function (plotdf
plotdf = as.data.frame(plotdf)
class(plotdf)
nc_change = which(colnames(plotdf) == "n")
colnames(plotdf)[nc_change] <- "pos_count"
colnames(plotdf)[nc_change] <- "position_count"
class(plotdf)
# if (all(plotdf$pos_count==plotdf$pos_count_check) ){
# cat("\nPASS: pos_count column created")
# plotdf = plotdf[, !colnames(plotdf)%in%c("pos_count_check")]
# if (all(plotdf$position_count==plotdf$position_count_check) ){
# cat("\nPASS: position_count column created")
# plotdf = plotdf[, !colnames(plotdf)%in%c("position_count_check")]
# }else{
# stop("\nAbort: pos count numbes mismatch from dplyr and data.table")
# }
cat("\nCumulative nssnp count\n"
, table(plotdf$pos_count))
, table(plotdf$position_count))
# calculating total no. of mutations
tot_muts = sum(table(plotdf$pos_count))
tot_muts = sum(table(plotdf$position_count))
# sanity check
@ -92,10 +92,10 @@ site_snp_count_bp <- function (plotdf
# creating df: average count of snpcount for each position
# created in earlier step
#-------------------------------------------------------
# use group by on pos_count
# use group by on position_count
snpsBYpos_df <- plotdf %>%
dplyr::group_by(eval(parse(text = df_colname))) %>%
dplyr::summarise(snpsBYpos = mean(pos_count)) # changed from summarize!
dplyr::summarise(snpsBYpos = mean(position_count)) # changed from summarize!
cat("\nnssnp count per position\n"
, table(snpsBYpos_df$snpsBYpos)