washb_prescreen(Y, Ws, family = "gaussian", pval = 0.2, print = TRUE)
washb_prescreen
#Prescreen function applied to the Bangladesh diarrheal disease outcome. #The function will test a matrix of covariates and return those related to child diarrheal disease with #a <0.2 p-value from a likelihood ratio test. #Load diarrhea data: library(washb) data(washb_bd_diar) data(washb_bd_enrol) # drop svydate and month because they are superceded in the child level diarrhea data washb_bd_enrol$svydate <- NULL washb_bd_enrol$month <- NULL # merge the baseline dataset to the follow-up dataset ad <- merge(washb_bd_enrol,washb_bd_diar,by=c("dataid","clusterid","block","tr"),all.x=F,all.y=T) # subset to the relevant measurement # Year 1 or Year 2 ad <- subset(ad,svy==1|svy==2) #subset the diarrhea to children <36 mos at enrollment ### (exlude new births that are not target children) ad <- subset(ad,sibnewbirth==0) ad <- subset(ad,gt36mos==0) # Exclude children with missing data ad <- subset(ad,!is.na(ad$diar7d)) #Re-order the tr factor for convenience ad$tr <- factor(ad$tr,levels=c("Control","Water","Sanitation","Handwashing","WSH","Nutrition","Nutrition + WSH")) #Ensure that month is coded as a factor ad$month <- factor(ad$month) #Sort the data for perfect replication when using V-fold cross-validation ad <- ad[order(ad$block,ad$clusterid,ad$dataid,ad$childid),] ###Subset to a new dataframe the variables to be screened: Ws <- subset(ad,select=c("fracode","month","agedays","sex","momage","momedu","momheight","hfiacat","Nlt18","Ncomp","watmin","elec","floor","walls","roof","asset_wardrobe","asset_table","asset_chair","asset_khat","asset_chouki","asset_tv","asset_refrig","asset_bike","asset_moto","asset_sewmach","asset_mobile")) ###Run the washb_prescreen function prescreened_varnames<-washb_prescreen(Y=ad$diar7d,Ws,family="binomial")Warning message: glm.fit: fitted probabilities numerically 0 or 1 occurredLikelihood Ratio Test P-values: [,1] [,2] [1,] "fracode" "P = 0.125" [2,] "month" "P = 0.000" [3,] "agedays" "P = 0.000" [4,] "sex" "P = 0.159" [5,] "momage" "P = 0.858" [6,] "momedu" "P = 0.001" [7,] "momheight" "P = 0.837" [8,] "hfiacat" "P = 0.000" [9,] "Nlt18" "P = 0.146" [10,] "Ncomp" "P = 0.858" [11,] "watmin" "P = 0.017" [12,] "elec" "P = 0.002" [13,] "floor" "P = 0.009" [14,] "walls" "P = 0.173" [15,] "roof" "P = 0.446" [16,] "asset_wardrobe" "P = 0.003" [17,] "asset_table" "P = 0.278" [18,] "asset_chair" "P = 0.264" [19,] "asset_khat" "P = 0.054" [20,] "asset_chouki" "P = 0.883" [21,] "asset_tv" "P = 0.109" [22,] "asset_refrig" "P = 0.015" [23,] "asset_bike" "P = 0.005" [24,] "asset_moto" "P = 0.233" [25,] "asset_sewmach" "P = 0.004" [26,] "asset_mobile" "P = 0.713" Covariates selected (P<0.2): [,1] [,2] [1,] "fracode" "P = 0.125" [2,] "month" "P = 0.000" [3,] "agedays" "P = 0.000" [4,] "sex" "P = 0.159" [5,] "momedu" "P = 0.001" [6,] "hfiacat" "P = 0.000" [7,] "Nlt18" "P = 0.146" [8,] "watmin" "P = 0.017" [9,] "elec" "P = 0.002" [10,] "floor" "P = 0.009" [11,] "walls" "P = 0.173" [12,] "asset_wardrobe" "P = 0.003" [13,] "asset_khat" "P = 0.054" [14,] "asset_tv" "P = 0.109" [15,] "asset_refrig" "P = 0.015" [16,] "asset_bike" "P = 0.005" [17,] "asset_sewmach" "P = 0.004"###Rerun the function with a stricter p=value prescreened_varname2s<-washb_prescreen(Y=ad$diar7d,Ws,family="binomial", pval=0.5)Warning message: glm.fit: fitted probabilities numerically 0 or 1 occurredLikelihood Ratio Test P-values: [,1] [,2] [1,] "fracode" "P = 0.125" [2,] "month" "P = 0.000" [3,] "agedays" "P = 0.000" [4,] "sex" "P = 0.159" [5,] "momage" "P = 0.858" [6,] "momedu" "P = 0.001" [7,] "momheight" "P = 0.837" [8,] "hfiacat" "P = 0.000" [9,] "Nlt18" "P = 0.146" [10,] "Ncomp" "P = 0.858" [11,] "watmin" "P = 0.017" [12,] "elec" "P = 0.002" [13,] "floor" "P = 0.009" [14,] "walls" "P = 0.173" [15,] "roof" "P = 0.446" [16,] "asset_wardrobe" "P = 0.003" [17,] "asset_table" "P = 0.278" [18,] "asset_chair" "P = 0.264" [19,] "asset_khat" "P = 0.054" [20,] "asset_chouki" "P = 0.883" [21,] "asset_tv" "P = 0.109" [22,] "asset_refrig" "P = 0.015" [23,] "asset_bike" "P = 0.005" [24,] "asset_moto" "P = 0.233" [25,] "asset_sewmach" "P = 0.004" [26,] "asset_mobile" "P = 0.713" Covariates selected (P<0.5): [,1] [,2] [1,] "fracode" "P = 0.125" [2,] "month" "P = 0.000" [3,] "agedays" "P = 0.000" [4,] "sex" "P = 0.159" [5,] "momedu" "P = 0.001" [6,] "hfiacat" "P = 0.000" [7,] "Nlt18" "P = 0.146" [8,] "watmin" "P = 0.017" [9,] "elec" "P = 0.002" [10,] "floor" "P = 0.009" [11,] "walls" "P = 0.173" [12,] "roof" "P = 0.446" [13,] "asset_wardrobe" "P = 0.003" [14,] "asset_table" "P = 0.278" [15,] "asset_chair" "P = 0.264" [16,] "asset_khat" "P = 0.054" [17,] "asset_tv" "P = 0.109" [18,] "asset_refrig" "P = 0.015" [19,] "asset_bike" "P = 0.005" [20,] "asset_moto" "P = 0.233" [21,] "asset_sewmach" "P = 0.004"