washb_prescreen. washb 0.1

Usage

washb_prescreen(Y, Ws, family = "gaussian", pval = 0.2, print = TRUE)

Arguments

Y: Outcome variable (continuous, such as LAZ, or binary, such as diarrhea)
Ws: data frame that includes candidate adjustment covariates to screen
family: GLM model family (gaussian, binomial, poisson, or negative binomial). Use "neg.binom" for Negative binomial.
pval: The p-value threshold: any variables with a p-value from the lielihood ratio test below this threshold will be returned. Defaults to 0.2
print: Logical for whether to print function output, defaults to TRUE.

Value

Function returns the list of variable names with a likelihood ratio test p-value <0.2 (unless a custom p-value is specified).

Description

washb_prescreen

Examples


#Prescreen function applied to the Bangladesh diarrheal disease outcome.
#The function will test a matrix of covariates and return those related to child diarrheal disease with
#a <0.2 p-value from a likelihood ratio test.

#Load diarrhea data:
library(washb)
data(washb_bd_diar)
data(washb_bd_enrol)

 # drop svydate and month because they are superceded in the child level diarrhea data
washb_bd_enrol$svydate <- NULL
washb_bd_enrol$month <- NULL

# merge the baseline dataset to the follow-up dataset
ad <- merge(washb_bd_enrol,washb_bd_diar,by=c("dataid","clusterid","block","tr"),all.x=F,all.y=T)

# subset to the relevant measurement
# Year 1 or Year 2
ad <- subset(ad,svy==1|svy==2)

#subset the diarrhea to children <36 mos at enrollment
### (exlude new births that are not target children)
ad <- subset(ad,sibnewbirth==0)
ad <- subset(ad,gt36mos==0)

# Exclude children with missing data
ad <- subset(ad,!is.na(ad$diar7d))

#Re-order the tr factor for convenience
ad$tr <- factor(ad$tr,levels=c("Control","Water","Sanitation","Handwashing","WSH","Nutrition","Nutrition + WSH"))

#Ensure that month is coded as a factor
ad$month <- factor(ad$month)

#Sort the data for perfect replication when using V-fold cross-validation
ad <- ad[order(ad$block,ad$clusterid,ad$dataid,ad$childid),]


###Subset to a new dataframe the variables to be screened:
Ws <- subset(ad,select=c("fracode","month","agedays","sex","momage","momedu","momheight","hfiacat","Nlt18","Ncomp","watmin","elec","floor","walls","roof","asset_wardrobe","asset_table","asset_chair","asset_khat","asset_chouki","asset_tv","asset_refrig","asset_bike","asset_moto","asset_sewmach","asset_mobile"))

###Run the washb_prescreen function
prescreened_varnames<-washb_prescreen(Y=ad$diar7d,Ws,family="binomial")
Warning message:
glm.fit: fitted probabilities numerically 0 or 1 occurred

Likelihood Ratio Test P-values:
      [,1]             [,2]       
 [1,] "fracode"        "P = 0.125"
 [2,] "month"          "P = 0.000"
 [3,] "agedays"        "P = 0.000"
 [4,] "sex"            "P = 0.159"
 [5,] "momage"         "P = 0.858"
 [6,] "momedu"         "P = 0.001"
 [7,] "momheight"      "P = 0.837"
 [8,] "hfiacat"        "P = 0.000"
 [9,] "Nlt18"          "P = 0.146"
[10,] "Ncomp"          "P = 0.858"
[11,] "watmin"         "P = 0.017"
[12,] "elec"           "P = 0.002"
[13,] "floor"          "P = 0.009"
[14,] "walls"          "P = 0.173"
[15,] "roof"           "P = 0.446"
[16,] "asset_wardrobe" "P = 0.003"
[17,] "asset_table"    "P = 0.278"
[18,] "asset_chair"    "P = 0.264"
[19,] "asset_khat"     "P = 0.054"
[20,] "asset_chouki"   "P = 0.883"
[21,] "asset_tv"       "P = 0.109"
[22,] "asset_refrig"   "P = 0.015"
[23,] "asset_bike"     "P = 0.005"
[24,] "asset_moto"     "P = 0.233"
[25,] "asset_sewmach"  "P = 0.004"
[26,] "asset_mobile"   "P = 0.713"


Covariates selected (P<0.2):
      [,1]             [,2]       
 [1,] "fracode"        "P = 0.125"
 [2,] "month"          "P = 0.000"
 [3,] "agedays"        "P = 0.000"
 [4,] "sex"            "P = 0.159"
 [5,] "momedu"         "P = 0.001"
 [6,] "hfiacat"        "P = 0.000"
 [7,] "Nlt18"          "P = 0.146"
 [8,] "watmin"         "P = 0.017"
 [9,] "elec"           "P = 0.002"
[10,] "floor"          "P = 0.009"
[11,] "walls"          "P = 0.173"
[12,] "asset_wardrobe" "P = 0.003"
[13,] "asset_khat"     "P = 0.054"
[14,] "asset_tv"       "P = 0.109"
[15,] "asset_refrig"   "P = 0.015"
[16,] "asset_bike"     "P = 0.005"
[17,] "asset_sewmach"  "P = 0.004"


###Rerun the function with a stricter p=value
prescreened_varname2s<-washb_prescreen(Y=ad$diar7d,Ws,family="binomial", pval=0.5)
Warning message:
glm.fit: fitted probabilities numerically 0 or 1 occurred

Likelihood Ratio Test P-values:
      [,1]             [,2]       
 [1,] "fracode"        "P = 0.125"
 [2,] "month"          "P = 0.000"
 [3,] "agedays"        "P = 0.000"
 [4,] "sex"            "P = 0.159"
 [5,] "momage"         "P = 0.858"
 [6,] "momedu"         "P = 0.001"
 [7,] "momheight"      "P = 0.837"
 [8,] "hfiacat"        "P = 0.000"
 [9,] "Nlt18"          "P = 0.146"
[10,] "Ncomp"          "P = 0.858"
[11,] "watmin"         "P = 0.017"
[12,] "elec"           "P = 0.002"
[13,] "floor"          "P = 0.009"
[14,] "walls"          "P = 0.173"
[15,] "roof"           "P = 0.446"
[16,] "asset_wardrobe" "P = 0.003"
[17,] "asset_table"    "P = 0.278"
[18,] "asset_chair"    "P = 0.264"
[19,] "asset_khat"     "P = 0.054"
[20,] "asset_chouki"   "P = 0.883"
[21,] "asset_tv"       "P = 0.109"
[22,] "asset_refrig"   "P = 0.015"
[23,] "asset_bike"     "P = 0.005"
[24,] "asset_moto"     "P = 0.233"
[25,] "asset_sewmach"  "P = 0.004"
[26,] "asset_mobile"   "P = 0.713"


Covariates selected (P<0.5):
      [,1]             [,2]       
 [1,] "fracode"        "P = 0.125"
 [2,] "month"          "P = 0.000"
 [3,] "agedays"        "P = 0.000"
 [4,] "sex"            "P = 0.159"
 [5,] "momedu"         "P = 0.001"
 [6,] "hfiacat"        "P = 0.000"
 [7,] "Nlt18"          "P = 0.146"
 [8,] "watmin"         "P = 0.017"
 [9,] "elec"           "P = 0.002"
[10,] "floor"          "P = 0.009"
[11,] "walls"          "P = 0.173"
[12,] "roof"           "P = 0.446"
[13,] "asset_wardrobe" "P = 0.003"
[14,] "asset_table"    "P = 0.278"
[15,] "asset_chair"    "P = 0.264"
[16,] "asset_khat"     "P = 0.054"
[17,] "asset_tv"       "P = 0.109"
[18,] "asset_refrig"   "P = 0.015"
[19,] "asset_bike"     "P = 0.005"
[20,] "asset_moto"     "P = 0.233"
[21,] "asset_sewmach"  "P = 0.004"