correcting dtype for sfluv and h1n1v for data formatting clinical

This commit is contained in:
Tanushree Tunstall 2020-11-24 13:48:53 +00:00
parent 7529549bfc
commit 08e01abfb5
4 changed files with 52 additions and 461 deletions

View file

@ -4,14 +4,8 @@ setwd('~/git/mosaic_2020/')
getwd()
########################################################################
# TASK: Run regression analysis
# npa
# clinical params and npa meds
########################################################################
#=================================================================================
# TO DO:
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
# Include NPA statistically sign params
# Rerun graphs and plots without asthma?
#=================================================================================
#====================
# Input: source data
@ -25,27 +19,29 @@ table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
table(clinical_df_ics$asthma)
if ( length(cols_to_extract) == length(clinical_cols) + length(sig_npa_cols) ){
cat("PASS: extracting clinical and sign npa cols")
} else{
cat("FAIL: could not find cols to extract")
quit()
}
fp_adults_reg = fp_adults[, cols_to_extract]
fp_adults_reg_na = fp_adults_na[, cols_to_extract]
#--------------------
# Data reassignment
#--------------------
my_data = clinical_df_ics
my_data_na = clinical_df_ics_na
my_data = fp_adults_reg
my_data_na = fp_adults_reg_na
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
# clear variables
#rm(fp_adults, fp_adults_na)
rm(fp_adults, fp_adults_na, clinical_df_ics, clinical_df_ics_na)
#########################################################################
if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){
cat("Character class for 1 column only:", "mosaic")
}else{
cat("More than one character class detected: Resolve!")
quit()
}
#============================
# Identifying column types: Reg data
#===========================
@ -57,6 +53,23 @@ my_vars = colnames(my_reg_data)
my_vars
lapply(my_reg_data, class)
check_int_vars = my_vars[lapply(my_reg_data, class)%in%c("integer")]
check_num_vars = my_vars[lapply(my_reg_data, class)%in%c("numeric")]
check_charac_vars = my_vars[lapply(my_reg_data, class)%in%c("character")]
check_factor_vars = my_vars[lapply(my_reg_data, class)%in%c("factor")]
cat("\nNo. of int cols:", length(check_int_vars)
, "\nNo. of num cols:", length(check_num_vars)
, "\nNo. of char cols:", length(check_charac_vars)
, "\nNo. of factor cols:", length(check_factor_vars)
)
# convert char vals to int as these should be int
my_reg_data[,check_charac_vars] = lapply(my_reg_data[,check_charac_vars], as.integer)
str(my_reg_data$sfluv)
numerical_vars = c("age"
, "vl_pfu_ul_npa1"
, "los"
@ -64,6 +77,11 @@ numerical_vars = c("age"
, "onsfindeath"
, "o2_sat_admis")
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
my_reg_params = my_vars