@Article{info:doi/10.2196/44296, author="Park, Junghwan and Norman, Gregory J and Klasnja, Predrag and Rivera, Daniel E and Hekler, Eric", title="Development and Validation of Multivariable Prediction Algorithms to Estimate Future Walking Behavior in Adults: Retrospective Cohort Study", journal="JMIR Mhealth Uhealth", year="2023", month="Jan", day="27", volume="11", pages="e44296", keywords="mobile health; mHealth; physical activity; walk; prediction; classification; multilayered perceptron; microrandomized trial; MRT; just-in-time adaptive intervention; JITAI; prevention; female; development; validation; application", abstract="Background: Physical inactivity is associated with numerous health risks, including cancer, cardiovascular disease, type 2 diabetes, increased health care expenditure, and preventable, premature deaths. The majority of Americans fall short of clinical guideline goals (ie, 8000-10,000 steps per day). Behavior prediction algorithms could enable efficacious interventions to promote physical activity by facilitating delivery of nudges at appropriate times. Objective: The aim of this paper is to develop and validate algorithms that predict walking (ie, >5 min) within the next 3 hours, predicted from the participants' previous 5 weeks' steps-per-minute data. Methods: We conducted a retrospective, closed cohort, secondary analysis of a 6-week microrandomized trial of the HeartSteps mobile health physical-activity intervention conducted in 2015. The prediction performance of 6 algorithms was evaluated, as follows: logistic regression, radial-basis function support vector machine, eXtreme Gradient Boosting (XGBoost), multilayered perceptron (MLP), decision tree, and random forest. For the MLP, 90 random layer architectures were tested for optimization. Prior 5-week hourly walking data, including missingness, were used for predictors. Whether the participant walked during the next 3 hours was used as the outcome. K-fold cross-validation (K=10) was used for the internal validation. The primary outcome measures are classification accuracy, the Mathew correlation coefficient, sensitivity, and specificity. Results: The total sample size included 6 weeks of data among 44 participants. Of the 44 participants, 31 (71{\%}) were female, 26 (59{\%}) were White, 36 (82{\%}) had a college degree or more, and 15 (34{\%}) were married. The mean age was 35.9 (SD 14.7) years. Participants (n=3, 7{\%}) who did not have enough data (number of days <10) were excluded, resulting in 41 (93{\%}) participants. MLP with optimized layer architecture showed the best performance in accuracy (82.0{\%}, SD 1.1), whereas XGBoost (76.3{\%}, SD 1.5), random forest (69.5{\%}, SD 1.0), support vector machine (69.3{\%}, SD 1.0), and decision tree (63.6{\%}, SD 1.5) algorithms showed lower performance than logistic regression (77.2{\%}, SD 1.2). MLP also showed superior overall performance to all other tried algorithms in Mathew correlation coefficient (0.643, SD 0.021), sensitivity (86.1{\%}, SD 3.0), and specificity (77.8{\%}, SD 3.3). Conclusions: Walking behavior prediction models were developed and validated. MLP showed the highest overall performance of all attempted algorithms. A random search for optimal layer structure is a promising approach for prediction engine development. Future studies can test the real-world application of this algorithm in a ``smart'' intervention for promoting physical activity. ", issn="2291-5222", doi="10.2196/44296", url="https://mhealth.jmir.org/2023/1/e44296", url="https://doi.org/10.2196/44296", url="http://www.ncbi.nlm.nih.gov/pubmed/36705954" }