<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMU</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Mhealth Uhealth</journal-id>
      <journal-title>JMIR mHealth and uHealth</journal-title>
      <issn pub-type="epub">2291-5222</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i2e11201</article-id>
      <article-id pub-id-type="pmid">30730297</article-id>
      <article-id pub-id-type="doi">10.2196/11201</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Applying Multivariate Segmentation Methods to Human Activity Recognition From Wearable Sensors’ Data</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Focsa</surname>
            <given-names>Mircea</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Albert</surname>
            <given-names>Mark</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Krishnan</surname>
            <given-names>Sri</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="author" id="contrib1" corresp="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Kenan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Preventive Medicine</institution>
            <institution>Keck School of Medicine of University of Southern California</institution>
            <addr-line>Soto Building Room 202-09</addr-line>
            <addr-line>2001 North Soto Street</addr-line>
            <addr-line>Los Angeles, CA, 90089</addr-line>
            <country>United States</country>
            <phone>1 2256102559</phone>
            <email>kenanl@usc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4641-6699</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib2">
          <name name-style="western">
            <surname>Habre</surname>
            <given-names>Rima</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2103-1706</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib3">
          <name name-style="western">
            <surname>Deng</surname>
            <given-names>Huiyu</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8239-2532</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib4">
          <name name-style="western">
            <surname>Urman</surname>
            <given-names>Robert</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2941-8964</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib5">
          <name name-style="western">
            <surname>Morrison</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1478-9965</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib6">
          <name name-style="western">
            <surname>Gilliland</surname>
            <given-names>Frank D</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-9033-7269</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib7">
          <name name-style="western">
            <surname>Ambite</surname>
            <given-names>José Luis</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-0087-080X</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib8">
          <name name-style="western">
            <surname>Stripelis</surname>
            <given-names>Dimitris</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3343-8335</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib9">
          <name name-style="western">
            <surname>Chiang</surname>
            <given-names>Yao-Yi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-8923-0130</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib10">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Yijun</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-0815-9636</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib11">
          <name name-style="western">
            <surname>Bui</surname>
            <given-names>Alex AT</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-4702-1373</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib12">
          <name name-style="western">
            <surname>King</surname>
            <given-names>Christine</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-7646-1028</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib13">
          <name name-style="western">
            <surname>Hosseini</surname>
            <given-names>Anahita</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-4560-7394</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib14">
          <name name-style="western">
            <surname>Vliet</surname>
            <given-names>Eleanne Van</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5997-3040</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib15">
          <name name-style="western">
            <surname>Sarrafzadeh</surname>
            <given-names>Majid</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-8407-8689</ext-link>
        </contrib>
        <contrib contrib-type="author" id="contrib16">
          <name name-style="western">
            <surname>Eckel</surname>
            <given-names>Sandrah P</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6050-7880</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
      <label>1</label>
      <institution>Department of Preventive Medicine</institution>
      <institution>Keck School of Medicine of University of Southern California</institution>  
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country></aff>
      <aff id="aff2">
      <label>2</label>
      <institution>Information Sciences Institute</institution>
      <institution>University of Southern California</institution>  
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country></aff>
      <aff id="aff3">
      <label>3</label>
      <institution>Spatial Sciences Institute</institution>
      <institution>University of Southern California</institution>  
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country></aff>
      <aff id="aff4">
      <label>4</label>
      <institution>Department of Radiological Sciences</institution>
      <institution>University of California Los Angeles</institution>  
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country></aff>
      <aff id="aff5">
      <label>5</label>
      <institution>Department of Biomedical Engineering</institution>
      <institution>University of California, Irvine</institution>  
      <addr-line>Irvine, CA</addr-line>
      <country>United States</country></aff>
      <aff id="aff6">
      <label>6</label>
      <institution>Department of Computer Science</institution>
      <institution>University of California Los Angeles</institution>  
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country></aff>
      <author-notes>
        <corresp>Corresponding Author: Kenan Li 
        <email>kenanl@usc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><month>02</month><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>07</day>
        <month>02</month>
        <year>2019</year>
      </pub-date>
      <volume>7</volume>
      <issue>2</issue>
      <elocation-id>e11201</elocation-id>
      <!--history from ojs - api-xml-->
      <history>
        <date date-type="received">
          <day>1</day>
          <month>6</month>
          <year>2018</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>7</month>
          <year>2018</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>9</month>
          <year>2018</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>11</month>
          <year>2018</year>
        </date>
      </history>
      <copyright-statement>©Kenan Li, Rima Habre, Huiyu Deng, Robert Urman, John Morrison, Frank D Gilliland, José Luis Ambite, Dimitris Stripelis, Yao-Yi Chiang, Yijun Lin, Alex AT Bui, Christine King, Anahita Hosseini, Eleanne Van Vliet, Majid Sarrafzadeh, Sandrah P Eckel. Originally published in JMIR Mhealth and Uhealth (http://mhealth.jmir.org), 07.02.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR mhealth and uhealth, is properly cited. The complete bibliographic information, a link to the original publication on http://mhealth.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://mhealth.jmir.org/2019/2/e11201/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Time-resolved quantification of physical activity can contribute to both personalized medicine and epidemiological research studies, for example, managing and identifying triggers of asthma exacerbations. A growing number of reportedly accurate machine learning algorithms for human activity recognition (HAR) have been developed using data from wearable devices (eg, smartwatch and smartphone). However, many HAR algorithms depend on fixed-size sampling windows that may poorly adapt to real-world conditions in which activity bouts are of unequal duration. A small sliding window can produce noisy predictions under stable conditions, whereas a large sliding window may miss brief bursts of intense activity.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to create an HAR framework adapted to variable duration activity bouts by (1) detecting the change points of activity bouts in a multivariate time series and (2) predicting activity for each homogeneous window defined by these change points.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We applied standard fixed-width sliding windows (4-6 different sizes) or greedy Gaussian segmentation (GGS) to identify break points in filtered triaxial accelerometer and gyroscope data. After standard feature engineering, we applied an Xgboost model to predict physical activity within each window and then converted windowed predictions to instantaneous predictions to facilitate comparison across segmentation methods. We applied these methods in 2 datasets: the <italic>human activity recognition using smartphones</italic> (<italic>HARuS</italic>) dataset where a total of 30 adults performed activities of approximately equal duration (approximately 20 seconds each) while wearing a waist-worn smartphone, and the Biomedical REAl-Time Health Evaluation for Pediatric Asthma (<italic>BREATHE</italic>) dataset where a total of 14 children performed 6 activities for approximately 10 min each while wearing a smartwatch. To mimic a real-world scenario, we generated artificial unequal activity bout durations in the BREATHE data by randomly subdividing each activity bout into 10 segments and randomly concatenating the 60 activity bouts. Each dataset was divided into ~90% training and ~10% holdout testing.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In the HARuS data, GGS produced the least noisy predictions of 6 physical activities and had the second highest accuracy rate of 91.06% (the highest accuracy rate was 91.79% for the sliding window of size 0.8 second). In the BREATHE data, GGS again produced the least noisy predictions and had the highest accuracy rate of 79.4% of predictions for 6 physical activities.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>In a scenario with variable duration activity bouts, GGS multivariate segmentation produced <italic>smart-sized</italic> windows with more stable predictions and a higher accuracy rate than traditional fixed-size sliding window approaches. Overall, accuracy was good in both datasets but, as expected, it was slightly lower in the more real-world study using wrist-worn smartwatches in children (BREATHE) than in the more tightly controlled study using waist-worn smartphones in adults (HARuS). We implemented GGS in an offline setting, but it could be adapted for real-time prediction with streaming data.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>physical activity</kwd>
        <kwd>smartphone</kwd>
        <kwd>statistical data analysis wearable devices</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Time-resolved quantification of physical activity is important because physical activity is linked with human health. Physical activity has direct health benefits, and the American College of Sports Medicine and the Centers for Disease Control and Prevention [<xref ref-type="bibr" rid="ref1">1</xref>] publish physical activity guidelines to promote and maintain public health (eg, children should do at least 60 min of physical activity per day). Physical activity also has indirect effects on health by modifying exposures of pollutants. The National Human Activity Pattern Survey [<xref ref-type="bibr" rid="ref2">2</xref>] found that human activity patterns play a key role in explaining variation in pollutant exposures—by impacting the timing, location, and degree of exposures—and related health outcomes. It follows that high-resolution time-resolved monitoring of human activity may have clinical and research applications. Not only could a person’s moderate-to-vigorous activity (or inactivity) be logged to quantify typical spatio-temporal patterns but deviations from the typical routine could also be identified as possible targets for intervention. The widespread use of wearable smartphones and smartwatches, together with advances in communication, computation, and sensing capabilities, makes real-time human activity recognition (HAR) possible by providing remote data acquisition and on-device processing.</p>
        <p>Indeed, wearable sensors and mobile devices are being increasingly used in studies assessing physical activity, sleep, mobility, medication adherence, and a variety of other areas [<xref ref-type="bibr" rid="ref3">3</xref>]. Our study is motivated by the “Pediatric Research using Integrated Sensor Monitoring Systems” (PRISMS) program— launched in 2015 by the National Institute of Biomedical Imaging and Bioengineering—to develop a sensor-based, integrated health monitoring system for studying pediatric asthma. Asthma is a heterogeneous, multifactorial disease that is one of the most common causes of emergency hospital visits in children [<xref ref-type="bibr" rid="ref4">4</xref>]. Important risk factors for asthma exacerbation include allergen and air pollutant exposures and viral infection [<xref ref-type="bibr" rid="ref4">4</xref>], but physical activity also plays an important role in asthma incidence [<xref ref-type="bibr" rid="ref5">5</xref>], acute symptoms [<xref ref-type="bibr" rid="ref6">6</xref>], and long-term control [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In a framework such as PRISMS, HAR may facilitate the management of asthma and the identification of triggers of exacerbation.</p>
      </sec>
      <sec>
        <title>Windowing in Human Activity Recognition Modeling Approaches</title>
        <p>Data for HAR are increasingly collected using wearable sensors (eg, accelerometers and gyroscopes) that permit continuous, real-time monitoring [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Most HAR studies summarize accelerometer and gyroscope data streams—as well as the resulting instantaneous activity predictions—using a time-based windowing approach. The reasons for this are two-fold. First, the typical duration of human activities is significantly longer than the sensors’ sampling rate (eg, 10-50 Hz). Second, raw data from an accelerometer or gyroscope are highly variable, noisy, and oscillatory, so instantaneous raw values may provide insufﬁcient information to differentiate the associated activity. The size of the window is constrained by the sensor sampling frequency and is an important parameter that affects the accuracy of the HAR prediction, the computational loads of the algorithm, and the energy consumption on the wearable device. When selecting the size of a fixed-size window, there is a trade-off between being too short (captures fine details and produces noisy predictions) and being too long (misses short-duration activity bouts and produces more stable predictions). In a platform such as PRISMS where researchers might want to tailor context-sensitive interactions with study participants (eg, triggering a notification or survey) based on physical activity patterns, windows that are too short could generate frequent interactions with users, leading to notification fatigue and reduced compliance. Longer windows could perform well at certain times of the day when activities are fairly constant over long periods (eg, sedentary classroom time) but poorly during periods of high variability (eg, gym class and getting ready for school). A variable-sized sampling window approach with data-driven break points (at times when the activities may change) has the potential to improve HAR and improve the usability of platforms involving HAR.</p>
      </sec>
      <sec>
        <title>Time Series Segmentation</title>
        <p>Fixed-size sliding windows are 1 type of a larger class of segmentation methods in time series analysis. Segmentation methods divide a time series into segments having similar characteristics. Most segmentation algorithms can be framed in several ways: (1) producing the best representation using only a given number of segments, (2) producing the best representation such that the maximum error for any segment does not exceed the given threshold, or (3) producing the best representation such that the combined error of all segments is less than the given threshold [<xref ref-type="bibr" rid="ref14">14</xref>]. Multivariate segmentation methods segment multidimensional signals. Multivariate segmentation has been studied in several contexts using various approaches (each with different assumptions), including Bayesian change point detection [<xref ref-type="bibr" rid="ref15">15</xref>], hypothesis testing [<xref ref-type="bibr" rid="ref16">16</xref>], mixture models, hidden Markov models [<xref ref-type="bibr" rid="ref17">17</xref>], and convex segmentation [<xref ref-type="bibr" rid="ref18">18</xref>]. For this study, we selected a multivariate segmentation algorithm called greedy Gaussian segmentation (GGS) [<xref ref-type="bibr" rid="ref19">19</xref>], which is based on maximizing the likelihood of the data for a fixed number of segments. GGS assumes that in each segment, the mean and covariance are constant and independent of the means and covariances in all other segments. GGS is a scalable greedy algorithm and is applicable to solve much larger problems (in terms of vector dimension and time series length) than many of other above methods.</p>
        <p>In this paper, we provide background on the GGS algorithm and perform a novel application of GGS to offline HAR, comparing GGS with the standard fixed-size sliding window approach. We use data from 2 HAR studies with different prescribed activity durations and different sensor wear modalities (waist-worn sensor and wrist-worn sensor). After processing the data using either segmentation approach, we used standard feature engineering and machine learning methods to predict activities and compared the accuracy of the 2 different segmentation approaches.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data</title>
        <p>The <italic>human activity recognition using smartphones</italic> (HARuS) dataset consists of 61 experiments conducted by 30 volunteers aged 19 to 48 years [<xref ref-type="bibr" rid="ref20">20</xref>]. Triaxial accelerometery and gyroscope data were collected at 50 Hz by a waist-worn smartphone (Samsung Galaxy S II). Each experiment was about 7 min long. In each experiment, the HARuS protocol scripted 12 ambulation activities, including 6 basic activities (each approximately 20 seconds in duration) and 6 postural transition activities (stand-to-sit, sit-to-stand, sit-to-lie, lie-to-sit, stand-to-lie, and lie-to-stand). The 6 basic activities include 3 static postures (standing, sitting, and lying) and 3 dynamic activities (walking, walking downstairs, and walking upstairs). The raw data were directly acquired from the smartphone readings, and the activities were labeled by manual review of video recordings of each experiment. To be consistent with previous studies [<xref ref-type="bibr" rid="ref11">11</xref>], we only modeled the 6 basic activities and deleted the 6 types of postural transition activity bouts and all unlabeled sessions, all of which were of relatively short duration and unlikely, for example, to be strongly associated with asthma exacerbation in studies using PRISMS [<xref ref-type="bibr" rid="ref5">5</xref>]. The dataset was divided into the first 55 experiments for training (2 experiments each for 26 people and 3 experiments for 1 participant) and 6 experiments (2 experiments each for 3 people) for holdout testing. The 6 raw signals of experiment 1 are plotted in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>The Los Angeles PRISMS Center <italic>BREATHE</italic> dataset [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>] was collected on 16 participants, aged 5 to 15 years, using the BREATHE Kit, an informatics platform designed to monitor multiple exposures, behaviors, and activities in context to identify personal triggers and predict the risk of pediatric asthma exacerbations in real time. Triaxial accelerometry and gyroscope data were collected at 10 Hz using a wrist-worn Motorola Moto 360 Sport smartwatch. Participants performed each of the 5 activities (standing, sitting, lying, walking, and walking on stairs) for 10 min and running for 5 min (to minimize discomfort). Unlike the HARuS dataset, participants were permitted to perform natural movements (especially free arm movement such as sitting while typing or using a smartphone) during each activity. The raw data were acquired as the end product of a data pipeline (from smartwatch to the BREATHE app on the smartphone via Bluetooth and then securely uploaded to the BREATHE servers wirelessly and in real times). For the BREATHE dataset, we modeled all 6 scripted activities: standing, sitting, lying, walking, walking on stairs (labels did not differentiate up and down stairs), and running. We used experiments from 14 of the 16 participants as 2 participants had substantial quantities of missing data. In the BREATHE dataset, data were saved as separate files for each activity, for each participant. To evaluate whether GGS segmentation improves prediction under a scenario of variable activity bout durations, we generated artificial activity data files for each participant by (1) randomly dividing his or her activity sessions (each about 10 min long) into 10 subsessions; then (2) randomly shuffling all subsessions (60 in total); and finally (3) concatenating all 60 subsessions into 1 data file, potentially resulting in fewer than 60 distinct activity bouts if bouts with identical activities are located next to each other. Hence, we produced 14 artificial activity files with artificial unequal activity bout durations, one for each of the 14 participants. The artificial dataset was divided into the first 12 participants for training and the last 2 participants for holdout testing. The 6 raw signals of experiment 1 are plotted in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
      <title>Workflow</title>
      <p><xref ref-type="fig" rid="figure1">Figure 1</xref> provides an overview of our workflow. For both datasets, the raw data were first preprocessed by applying a median filter (kernel size=3) to remove outliers. Afterwards, a Butterworth [<xref ref-type="bibr" rid="ref24">24</xref>] filter was used to remove artifacts and baseline wandering noise associated with the data acquisition process (eg, the constant force of gravity or shaking the device). Specifically, a third-order low-pass Butterworth filter was applied separately to each triaxial component (x, y, and z of the accelerometer and gyroscope). A power spectral density (PSD) was calculated and used to choose the cut-off frequency, over which the sensor signals were attenuated. PSD is a metric that estimates the distribution of power over frequency, and it has been widely implemented to evaluate filters of high-frequency with baseline-wandering noise [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
      <p>Subsequently, the data streams were temporally aligned. The sampling frequency observed in practice can be a result of practical constraints (eg, battery saving and restricted access by the software stack in mobile device’s operating systems). Thus, observed data can be sampled irregularly, with mismatch between the 2 sensors. In the HARuS dataset, there were no mismatched time stamps (ie, only existing for 1 sensor) when we concatenated accelerometer and gyroscope readings according to their time stamps. However, the BREATHE dataset contained considerable mismatching, and both the accelerometer and the gyroscope were not perfectly collected at 10 Hz. To align the 2 sensor readings, we first downscale sampled the raw data at 50 Hz to round their time stamps to the nearest 50 Hz sampling point, and then we applied a linear interpolation method.</p>  
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>The workflow of the human activity recognition framework. GGS: greedy Gaussian segmentation.</p>
        </caption>
        <graphic xlink:href="mhealth_v7i2e11201_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Specifically, we added (as necessary) records for all 50 Hz time stamps to both sensor data files and linearly interpolated missing sensor readings (approximately 80% because of the downscaling) based on the left 5 adjacent nonmissing values and the right 5 adjacent nonmissing values. In addition to the missing values caused by the mismatching time stamps, there was also a number of longer periods with missing values in the BREATHE dataset. After aligning the 2 sensors, we truncated time periods with more than 10 seconds of consecutive missing values.</p>
      <p>Data transformation was used to augment the original data (6 signals from 2 triaxial sensors) with additional transformed signals. Statistical features were later extracted from both the raw and transformed signals. Specifically, 8 new signals were generated: 6 derivatives with respect to time (1 for each of the 6 original signals) and 2 Euclidean norms (1 for the x-, y-, and z-axis of each sensor). Hence, a total of 14 signals were available (6 original measured signals and 8 new calculated signals).</p>
      <p>Time windows were generated using 2 approaches. First, multivariate segmentation on the 6 original signals produced windows of varying sizes, with break points selected using training data to reflect changes of the means and covariances of the raw signals (a detailed description follows). Second, for comparison, we created various sizes of nonoverlapping fixed-length sliding windows (4 sizes for HARuS dataset: 0.2 second, 0.8 second, 3 seconds, and 8 seconds; 6 sizes for BREATHE dataset: 0.2 second, 0.8 second, 3 seconds, 8 seconds, 12 seconds, and 40 seconds). Window sizes were chosen to include approximately the window size in the original HARuS study (2.56 seconds) [<xref ref-type="bibr" rid="ref20">20</xref>] and to reflect a wide enough range to include the optimum window size for both datasets.</p>
      <p>Within each set of windows, we extracted statistical features for input into a machine learning model. These statistical features were either based on time domain (the original time-based windows) or frequency domain (Fourier transformation of the original time-based windows). For each set of windows, we calculated a total of 168 features: 6 statistics (arithmetic mean, SD, median absolute deviation, minimum, maximum, and entropy) on 14 signals and on both the time and frequency domains (6 x 14 x 2=168).</p></sec>
      <sec>
        <title>Multivariate Segmentation</title>
        <p>A brief description of GGS [<xref ref-type="bibr" rid="ref19">19</xref>] is as follows. Consider a multivariate time series consisting of <italic>T</italic> time instants <italic>x</italic><sub><italic>1</italic> </sub>, <italic>x</italic><sub><italic>2</italic> </sub>,..., <italic>x</italic><sub><italic>T</italic> </sub> ∈ <italic>R</italic><sup><italic>m</italic> </sup>, where <italic>m</italic> is the number of features (ie, m=6 in our study). The time series need not be uniformly sampled in real time (see note in Discussion on the independence assumption). Given K break points <italic>b</italic><sub><italic>1</italic> </sub>,..., <italic>b</italic><sub><italic>K</italic> </sub> ∈ (<italic>1</italic>,..., <italic>T</italic>) between a starting point <italic>b</italic><sub><italic>0</italic> </sub>= <italic>1</italic> and an end point <italic>b</italic><sub><italic>K+1</italic> </sub>= <italic>T</italic>, we assume that <italic>x</italic><sub><italic>t</italic> </sub> ~ <italic>MVN</italic> (<italic>µ</italic><sub><italic>bi</italic> </sub>, <italic>Σ</italic><sub><italic>bi</italic> </sub>) ∀ <italic>t</italic> ∈ (<italic>b</italic><sub><italic>i</italic> </sub>,..., <italic>b</italic><sub><italic>i+1</italic> </sub>) ∀ <italic>i</italic> ∈ [<italic>0</italic>, <italic>K</italic>] and are independent samples, where <italic>µ</italic><sub><italic>bi</italic> </sub> and <italic>Σ</italic><sub><italic>bi</italic> </sub> denote the mean vector and covariance matrix of the multivariate normal distribution within the interval of (<italic>b</italic><sub><italic>i</italic> </sub>,.., <italic>b</italic><sub><italic>i+1</italic> </sub>). A GGS can be learned on the multivariate time series by fitting a greedy algorithm to maximize the covariance-regularized log-likelihood.</p>
        <p>In <xref ref-type="fig" rid="figure2">Figure 2</xref> equation a, where <italic>l</italic> (<italic>b</italic>, <italic>µ</italic>, <italic>Σ</italic>) denotes the log-likelihood before regularization, <italic>b</italic> denotes the vector of break points, <italic>µ</italic> denotes [<italic>µ</italic><sub><italic>b0</italic> </sub>,..., <italic>µ</italic><sub><italic>bK</italic> </sub>], <italic>Σ</italic> denotes [<italic>Σ</italic><sub><italic>b0</italic> </sub>,..., <italic>Σ</italic><sub><italic>bK</italic> </sub>], and <italic>λ ≥</italic> 0 is an <italic>a priori</italic> specified hyperparameter that controls the amount of regularization [<xref ref-type="bibr" rid="ref19">19</xref>]. The greedy heuristic algorithm follows a top-down subroutines of adding a new break point with the largest increase of <italic>Φ(b,µ,Σ)</italic> at each step until K, and then in a bottom-up way adjusts the positions of all break points until no change of any 1 break point increases <italic>Φ(b,µ,Σ)</italic>. A curve of the covariance-regularized log-likelihood versus K can be used to select K for a given dataset.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Equations.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Gradient Boosted Trees Classification</title>
        <p>To achieve high accuracy using a scalable method, we predicted activity classes using Xgboost [<xref ref-type="bibr" rid="ref26">26</xref>], an implementation of a tree-based boosting widely used in machine learning challenges. For a given dataset (D) with <italic>n</italic> observations and <italic>p</italic> features (ie, <italic>p</italic>=168 in our analysis), <italic>D</italic>={(<italic>x</italic><sub><italic>i</italic> </sub> ∈ <italic>R</italic><sup><italic>p</italic> </sup>, <italic>y</italic><sub><italic>i</italic> </sub> ∈ <italic>R</italic>)} ∀ <italic>i</italic> ∈[<italic>1</italic>, <italic>n</italic>], Xgboost ensembles M trees denoted <italic>f</italic><sub><italic>m</italic> </sub> to predict the output <italic>y</italic><sub><italic>i</italic> </sub>.</p>
        <p>The model is trained in a greedy, additive manner starting from m=1 (<xref ref-type="fig" rid="figure2">Figure 2</xref>, equation b). Let ŷ<sub>i</sub><sup>m</sup><sup>−</sup><sup>1</sup> be the prediction of y<sub>i</sub> at the (m−1)<sup>th</sup> iteration. We add f<sub>m</sub> to minimize the following objective (J<sup>m</sup>) until the satisfying convergence between the prediction and the ground truth, where j is a predefined differentiable convex loss function that measures the difference between the current prediction and the ground truth and Ω is a predefined regularization term that penalizes the complexity of the model to prevent overfitting:</p>
        <p>Xgboost has features that can outperform other implementations of tree-based boosting (eg, boosted trees in scikit-learn and generalized boosted regression model in R) such as (1) using an exact (or approximate, for large datasets) greedy algorithm to enumerate over all possible splits to find the best solution, (2) alleviating slow-downs using a cache-aware prefetching algorithm, and (3) enabling out-of-core computation by dividing the data into multiple blocks, each stored on disk, to use machine’s maximum resources (see <xref ref-type="fig" rid="figure2">Figure 2</xref>, equation c).</p>
        <p>For the HARuS and BREATHE datasets, we tuned and implemented an Xgboost model with <italic>m</italic>=200 trees and learning rate=0.1 (more specifications in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>) using <italic>p</italic>=168 features calculated on each segment (from fixed-sized windows or GGS) of the training data. Segment-specific predictions for the testing data were translated into instantaneous predictions to facilitate comparison across segmentation approaches. Final evaluations of accuracy were based on instantaneous predictions.</p>
        
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Confusion matrix of instantaneous predictions using greedy Gaussian segmentation from the 6 test experiments in the Human Activity Recognition using Smartphones dataset.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="100"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>True categories</td>
                <td colspan="6">Xgboost<sup>a</sup> predicted categories</td>
                <td>Recall (%)</td>
                <td>Precision (%)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>W<sup>b</sup></td>
                <td>WU<sup>c</sup></td>
                <td>WD<sup>d</sup></td>
                <td>ST<sup>e</sup></td>
                <td>STD<sup>f</sup></td>
                <td>LY<sup>g</sup></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>W</td>
                <td>11238</td>
                <td>0</td>
                <td>935</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>100</td>
                <td>92.32</td>
              </tr>
              <tr valign="top">
                <td>WU</td>
                <td>0</td>
                <td>11070</td>
                <td>1297</td>
                <td>0</td>
                <td>12</td>
                <td>0</td>
                <td>99.61</td>
                <td>89.43</td>
              </tr>
              <tr valign="top">
                <td>WD</td>
                <td>0</td>
                <td>0</td>
                <td>11659</td>
                <td>0</td>
                <td>40</td>
                <td>0</td>
                <td>83.36</td>
                <td>99.66</td>
              </tr>
              <tr valign="top">
                <td>ST</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>11037</td>
                <td>2798</td>
                <td>0</td>
                <td>85.14</td>
                <td>79.78</td>
              </tr>
              <tr valign="top">
                <td>STD</td>
                <td>0</td>
                <td>24</td>
                <td>96</td>
                <td>1926</td>
                <td>12546</td>
                <td>0</td>
                <td>81.49</td>
                <td>85.98</td>
              </tr>
              <tr valign="top">
                <td>LY</td>
                <td>0</td>
                <td>19</td>
                <td>0</td>
                <td>0</td>
                <td>0</td>
                <td>15266</td>
                <td>100</td>
                <td>99.88</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Xgboost specification: base_score=0.5, booster=“gbtree,” colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=2, min_child_weight=1, missing=None, n_estimators=200, n_jobs=1, nthread=None, objective=“multi:softprob,” random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1. Overall accuracy: 91.06%.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>W: walking.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>WU: walking upstairs.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>WD: walking downstairs.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>ST: sitting.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>STD: standing.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>LY: laying.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Confusion matrix of instantaneous predictions using greedy Gaussian segmentation from the 2 test experiments in the BREATHE dataset.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="100"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>True categories</td>
                <td colspan="6">Xgboost<sup>a</sup> Predicted categories</td>
                <td>Recall (%)</td>
                <td>Precision (%)</td>
              </tr>
              <tr valign="top">
                <td><break/></td>
                <td>L<sup>b</sup></td>
                <td>R<sup>c</sup></td>
                <td>ST<sup>d</sup></td>
                <td>STR<sup>e</sup></td>
                <td>STD<sup>f</sup></td>
                <td>WK<sup>g</sup></td>
                <td><break/></td>
                <td><break/></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>L</td>
                <td>38874</td>
                <td>166</td>
                <td>6920</td>
                <td>830</td>
                <td>3938</td>
                <td>0</td>
                <td>76.63</td>
                <td>68.76</td>
              </tr>
              <tr valign="top">
                <td>R</td>
                <td>1587</td>
                <td>31593</td>
                <td>0</td>
                <td>12402</td>
                <td>791</td>
                <td>11693</td>
                <td>54.41</td>
                <td>82.54</td>
              </tr>
              <tr valign="top">
                <td>S</td>
                <td>12483</td>
                <td>0</td>
                <td>38596</td>
                <td>864</td>
                <td>8030</td>
                <td>154</td>
                <td>64.19</td>
                <td>72.65</td>
              </tr>
              <tr valign="top">
                <td>STR</td>
                <td>559</td>
                <td>6505</td>
                <td>1929</td>
                <td>46751</td>
                <td>2320</td>
                <td>6156</td>
                <td>72.80</td>
                <td>71.17</td>
              </tr>
              <tr valign="top">
                <td>STD</td>
                <td>887</td>
                <td>0</td>
                <td>5127</td>
                <td>0</td>
                <td>54300</td>
                <td>77</td>
                <td>89.91</td>
                <td>72.05</td>
              </tr>
              <tr valign="top">
                <td>WK</td>
                <td>2146</td>
                <td>12</td>
                <td>555</td>
                <td>4846</td>
                <td>5976</td>
                <td>52455</td>
                <td>79.49</td>
                <td>74.37</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Xgboost specification: base_score=0.5, booster=“gbtree,” colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=None, n_estimators=200, n_jobs=1, nthread=None, objective=“multi:softprob,” random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1. Overall accuracy: 79.4%.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>L: lie.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>R: run.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>S: sit.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>STR: stair.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>STD: stand.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>WK: walk.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Human Activity Recognition Using Smartphones Dataset</title>
        <p>The PSD curves to determine the cut-off frequency of the Butterworth filter are displayed in <xref ref-type="fig" rid="figure3">Figure 3</xref>. All 6 PSD curves taper to 0 at higher frequencies, with largest values in the lower frequency range from 0 Hz to 5 Hz. There is little baseline wandering noise in high frequencies (&gt;10 Hz). For consistency with previous studies [<xref ref-type="bibr" rid="ref11">11</xref>], we chose 20 Hz as the cut-off frequency.</p>
        <p>For GGS in the HARuS training data, the total covariance-regularized log-likelihood elevated rapidly as K increased from 0 to an inflection point around 16, and then even less rapidly (<xref ref-type="fig" rid="figure4">Figure 4</xref>). To favor more detailed segmentation results and allow for some incorrectly identified break points, especially during noisy periods and the transitory periods, we conservatively selected 50 break points.</p>
        
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Triaxial (x, y, and z) power spectra density curves of accelerometer (top row) and gyroscope (bottom row) of the human activity recognition using smartphones training dataset. ACC: accelerometer; Gyro: gyroscope; PSD: power spectral density.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Total covariance-regularized log-likelihood curve of the human activity recognition using smartphones training dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Multivariate segmentation break points (K=50) displayed using vertical dashed lines on the time series of x-axis accelerometer readings from experiment 1 in the human activity recognition using smartphones training dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
        <p>As shown in <xref ref-type="fig" rid="figure5">Figure 5</xref> for experiment 1, the 13 bouts of the 6 nontransitory activities were generally well separated by the 50 break points. For this experiment, the first bout of sitting and the second bout of laying were both relatively noisy, and erroneous break points were created within these sessions.</p>
        <p>We trained an Xgboost model (<xref ref-type="fig" rid="figure6">Figure 6</xref>), a support vector machine (SVM) model using a radial basis function kernel and a random forest model using the segmented data. The instantaneous accuracy rate of the Xgboost model using GGS in the 6 holdout experiments was 91.06% (<xref ref-type="table" rid="table1">Table 1</xref>). This result is higher than the 89.3% accuracy reported in the original HARuS study on the same set of 6 activities [<xref ref-type="bibr" rid="ref11">11</xref>], and it also should be noted that their accuracy was calculated using sliding window predictions and not instantaneous predictions. Had we calculated accuracy using segment-level predictions, our accuracy would have been 95.96%. When activities were misclassified, they tended to be misclassified as other similar energy activities (<xref ref-type="table" rid="table1">Table 1</xref>). For example, sitting was most frequently misclassified as standing. The results of the SVM model and the random forest model are summarized in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>In comparison, the instantaneous accuracy of Xgboost models fitted using fixed-width sliding windows was highest for the 0.8-second window (91.79%), as shown in <xref ref-type="fig" rid="figure7">Figure 7</xref>. This <italic>optimal</italic> window size is smaller than the one used in the original HARuS paper (2.56 seconds) [<xref ref-type="bibr" rid="ref20">20</xref>]. As might be expected from experiments designed to have equally sized activity bouts, the 0.8-second fixed-size sliding window accuracy was slightly higher than that from GGS (91.06%). In the HARuS data, predictions were relatively stable, with some additional variability for the smallest size sliding windows (<xref ref-type="fig" rid="figure8">Figure 8</xref>). The 3 most important features from Xgboost using GGS were the segment-specific mean, minimum of the x-axis of the accelerometer, and the mean of the x-axis of the gyroscope (<xref ref-type="fig" rid="figure9">Figure 9</xref>).</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Instantaneous predictions using greedy Gaussian segmentation (top row) and ground truth (bottom row) from the 6 test experiments in the human activity recognition using smartphones dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Accuracy of instantaneous predictions using 4 different fixed-size sliding windows (SWs) in the 6 test experiments in the human activity recognition using smartphones dataset. The horizontal dashed line represents the accuracy using greedy Gaussian segmentation. GGS: greedy Gaussian segmentation.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Predictions using 4 different fixed-sized sliding windows (SWs) and greedy Gaussian segmentation, as well as the ground truth for the 6 test experiments in the human activity recognition using smartphones dataset. GGS: greedy Gaussian segmentation; SW: sliding window.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Importance of the top 15 features from Xgboost using greedy Gaussian segmentation from the human activity recognition using smartphones dataset. Abbreviations in the feature names are standard deviation (std), minimum (min), maximum (max), mean absolute deviation (mad), Euclidean magnitude (norm), and derivative (jerk). The operators in the names should be read in the order of from the right to the left. For example, acc_x_jerk_max means the maximum value of the derivative values on the x-axis of the accelerometer sensor. Acc: accelerometer; Gyro: gyroscope.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>BREATHE Dataset</title>
        <p>On the basis of the PSD plots of the training data (<xref ref-type="fig" rid="figure10">Figure 10</xref>), we again chose 20 Hz as the cut-off frequency for the Butterworth filter. The gyroscope energies are in the same scale as the HARuS dataset; however, the accelerometer readings have much larger amplitudes, which makes the curves look smoother in the range of approximately 5 Hz. The zoom-in windows in the accelerometer’s 3 subplots show the variations of the PSD curves in the range from 2.5 Hz to 7.5 Hz on a similar scale to that used in the PSD plots for the HARuS data.</p>
        <p>The covariance-regularized log-likelihood curve for the 12 training experiments in the BREATHE dataset (<xref ref-type="fig" rid="figure11">Figure 11</xref>) had one inflection point at approximately K=60 but no clear second inflection point (through K=300) as we had observed in the HARuS dataset. Interestingly, there were, by design, approximately 60 activity bouts in each BREATHE experiment, demonstrating that GGS again identified the number of different activity bouts. We arbitrarily chose K=100 break points for multivariate segmentation as it was a round number larger than the most obvious inflection point. From <xref ref-type="fig" rid="figure12">Figure 12</xref>, it appears that 100 was an adequate number of break points. A choice of 60 break points would have been inadequate to segment approximately 60 bouts as some noisier bouts were erroneously partitioned into multiple segments.</p>
        <p>Similar to the HARuS dataset, 3 models were trained: Xgboost, SVM, and random forestAs evident from <xref ref-type="fig" rid="figure13">Figure 13</xref>, the predictive accuracy for certain activities varied across participants (eg, the accuracy for running was 71.5% for the participant in experiment 13 and 74.4% for the participant in experiment 14). Similar to the HARuS results, most misclassified records were shuffled either within the active group (walk, stair, and run) or the inactive group (sit, lie, and stand). If the activities had been grouped into active or inactive, the instantaneous accuracy rate would have been 95.0%. The results of the SVM model and the random forest model are shown in <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>. The instantaneous accuracy rate of the Xgboost model using GGS was 79.4% (<xref ref-type="table" rid="table2">Table 2</xref> and <xref ref-type="fig" rid="figure14">Figure 14</xref>).</p>
        <p>The accuracies of Xgboost from the 4 smallest fixed-size sliding windows (the same sizes as used in the HARuS dataset) increased monotonously. To achieve the reverse U-shape curve indicating that we obtained the optimum window size, we included 2 additional window sizes. The highest accuracy was achieved for the 8-second window (72.7%) as shown in <xref ref-type="fig" rid="figure13">Figure 13</xref>. As expected in this dataset with activity bouts of unequal duration, the <italic>smart-sized</italic> GGS segmentation (79.4% accuracy) considerably outperformed the fixed-size sliding windows. Not only was GGS more accurate but it also produced considerably less noisy predictions as shown in <xref ref-type="fig" rid="figure15">Figure 15</xref>. The 2 most important features from Xgboost using GGS were segment specific: mean z-axis and the minimum norm of the triaxial accelerometer signal (<xref ref-type="fig" rid="figure16">Figure 16</xref>).</p>
        <fig id="figure10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Triaxial (x, y, and z) power spectra density curves of accelerometer (upper 3 subplots) and gyroscope meter (lower 3 subplots) of the BREATHE training dataset. ACC: accelerometer; Gyro: gyroscope; PSD: power spectral density.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Total covariance-regularized log-likelihood curve of the BREATHE training dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure12" position="float">
          <label>Figure 12</label>
          <caption>
            <p>Multivariate segmentation break points (K=100) displayed using vertical dashed lines on the time series of x-axis accelerometer readings from experiment 1 in the BREATHE training dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure13" position="float">
          <label>Figure 13</label>
          <caption>
            <p>Instantaneous predictions using greedy Gaussian segmentation (top) and ground truth (bottom) from the 2 test experiments (13 and 14) in the BREATHE dataset.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        
        <fig id="figure14" position="float">
          <label>Figure 14</label>
          <caption>
            <p>Accuracy of instantaneous predictions from Xgboost using 6 different fixed-size sliding windows (SWs) in the 2 test experiments in the BREATHE dataset. The horizontal dashed line represents the accuracy from Xgboost with greedy Gaussian segmentation. SW: sliding window; GGS: greedy Gaussian segmentation.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure15" position="float">
          <label>Figure 15</label>
          <caption>
            <p>Predictions from Xgboost using 6 different fixed-sized sliding windows (SWs) and greedy Gaussian segmentation as well as the ground truth for experiment 13 of the BREATHE test data. SW: sliding window; GGS: greedy Gaussian segmentation.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure16" position="float">
          <label>Figure 16</label>
          <caption>
            <p>Importance of the top 15 features from Xgboost using greedy Gaussian segmentation from the BREATHE dataset. Abbreviations in the feature names are SD, minimum (min), maximum (max), mean absolute deviation (mad), Euclidean magnitude (norm), and derivative (jerk). The operators in the names should be read in the order of from the right to the left. For example, acc_x_jerk_max means the maximum value of the derivative values on the x-axis of the accelerometer sensor.</p>
          </caption>
          <graphic xlink:href="mhealth_v7i2e11201_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Summary of Findings</title>
        <p>We found that Xgboost using GGS outperformed Xgboost using fixed-size sliding windows in a dataset with unequal activity bout durations (BREATHE), by producing more accurate and considerably more stable predictions. When implemented in a platform such as PRISMS, GGS should be able to identify short bursts of activity while still producing relatively smooth predictions. Identification of short activity bouts is particularly important for appropriately quantifying vigorous activity in children [<xref ref-type="bibr" rid="ref27">27</xref>]. Noisy predictions from fixed-size sliding windows might need to be smoothed by pooling (ie, majority vote) for improved face validity of reported activity classifications and to avoid triggering excessive user notifications. Note that we presented our results using instantaneous predictions—to allow for comparisons across segmentation methods—that resulted in slightly lower accuracy than previous studies presenting segment-level predictions. In practice, segment-level predictions are typically used.</p>
        <p>Major differences between the HARuS and BREATHE datasets included not only activity bout duration (equal vs unequal), participant ages (adults vs children), and experimental protocol (tightly proscribed activities vs activities allowing for more natural movements) but also how the sensors were worn. This difference in wear location is likely the cause of the differences between the most important features in the Xgboost models. The axes of a device (smartwatch or smartphone) are typically labeled as x, denoting the side-to-side dimension; y, denoting the forward and backward dimension; and z, denoting the up and down dimension. Incorporating these axes with the wearing position of the 2 datasets, forward movement would correspond to signal along the x-axis for HARuS participants and the z-axis (slightly deviated to x-axis) for BREATHE participants. For both datasets, the most important features appeared to be related to forward motion (x-axis for the HARuS data and z-axis or combination of axes, ie, the norm for the BREATHE data) and the direction perpendicular to this motion (eg, mean values of the y-axis of the accelerometer, acc_y_mean, which had the third highest score in the HARuS data and the fourth highest score in the BREATHE data).</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>In this study, the models were trained by <italic>clip-independent</italic> method. Time dependency is more obvious in datasets with temporal context, and many researches applied hidden Markov model (HMM) to such datasets as motion videos or images [<xref ref-type="bibr" rid="ref28">28</xref>], body makers [<xref ref-type="bibr" rid="ref29">29</xref>], and so on. For pure waist- or wrist-worn accelerometer or gyroscope meter, the signals do not have the strong time dependency as those temporal context data. Second, to compare the <italic>time-dependent</italic> methods, HMM should be tested with other analogic methods such as long short-term memory (LSTM), but not GGS. GGS is a way to clip the data such as the <italic>fixed-length</italic> sliding window. We can either apply <italic>clip-independent</italic> method as in this study or HMM or LSTM to test the time dependency among those clips.</p>
        <p>The major weaknesses of the GGS approach are computational load and space requirements. To deploy GGS on streaming data, we would need to maintain a much larger cache memory of the latest received streaming data in comparison with the traditional fixed-length sliding window methods. GGS also requires time series of continuous features. However, sensor data (such as accelerometer and gyroscope) are typically quantitative, so this requirement is reasonable. Furthermore, missing values need to be either removed or interpolated. As for scalability, GGS has a runtime complexity of O(KTn<sup>3</sup>) in the normal mode and O(Tn<sup>3</sup>) in a <italic>warm start</italic> mode, in which the algorithm directly starts with a random set of K breaking points. Fixed-size sliding window approaches have better runtime complexity of O(n). Thus, the greedy heuristics needs to be improved in our future study. However, as the number of segments (K) is generally much smaller than the optimum number of fixed-size windows, GGS could largely save computational loads in the subsequent feature engineering, especially when tremendous feature to be extracted. Statistically, the GGS algorithm assumes that the multivariate time series can be described as independent samples from a multivariate Gaussian distribution within each segment. Time series data typically display autocorrelation, which would violate the independence assumption, especially when breaking points were not enough to separate the autocorrelated parts into different segments.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Identification of the break points that signify changes in physical activity plays an important role in quantifying HAR. In platforms such as PRISMS, HAR can be used not only to quantify the total duration of time in, for example, light, moderate, or vigorous activity but also to trigger user notifications or alerts or provide real-time feedback on activity. Our GGS-based approach shows great potential in variable activity bout duration scenarios and produces fewer variable predictions that should minimize unnecessary interactions with the user. However, computational and implementation limitations exist. Interesting future work will be focused on deploying GGS in real-time data streams and, more generally, finding heterogeneous segments when introducing additional sensor signals measured at different frequencies and on different scales (eg, sensors for physiological signals such as heart rate).</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <app id="app1">
        <title>Multimedia Appendix 1</title>
        <p>Supplemental tables and figures.</p>
        <media xlink:href="mhealth_v7i2e11201_app1.pdf" xlink:title="PDF File (Adobe PDF File), 1MB"/>
      </app>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">GGS</term>
          <def>
            <p>greedy Gaussian segmentation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">HAR</term>
          <def>
            <p>human activity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HARuS</term>
          <def>
            <p>human activity recognition using smartphones</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HMM</term>
          <def>
            <p>hidden Markov model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PRISMS</term>
          <def>
            <p>Pediatric Research using Integrated Sensor Monitoring Systems</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PSD</term>
          <def>
            <p>power spectral density</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank the participants of both studies and all the staff who made data collection possible, particularly Lisa Valencia. This study was supported by the National Institute of Biomedical Imaging and Bioengineering (grants U24EB021996, U54EB022002) and the National Institute of Environmental Health Sciences Center (grant 5P30ES07048).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Haskell</surname>
            <given-names>WL</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>IM</given-names>
          </name>
          <name name-style="western">
            <surname>Pate</surname>
            <given-names>RR</given-names>
          </name>
          <name name-style="western">
            <surname>Powell</surname>
            <given-names>KE</given-names>
          </name>
          <name name-style="western">
            <surname>Blair</surname>
            <given-names>SN</given-names>
          </name>
          <name name-style="western">
            <surname>Franklin</surname>
            <given-names>BA</given-names>
          </name>
          <name name-style="western">
            <surname>Macera</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Heath</surname>
            <given-names>GW</given-names>
          </name>
          <name name-style="western">
            <surname>Thompson</surname>
            <given-names>PD</given-names>
          </name>
          <name name-style="western">
            <surname>Bauman</surname>
            <given-names>A</given-names>
          </name>
          <collab>American College of Sports Medicine</collab>
          <collab>American Heart Association</collab>
        </person-group>
        <article-title>Physical activity and public health: updated recommendation for adults from the American College of Sports Medicine and the American Heart Association</article-title>
        <source>Circulation</source>  
        <year>2007</year>  
        <month>08</month>  
        <day>28</day>  
        <volume>116</volume>  
        <issue>9</issue>  
        <fpage>1081</fpage>  
        <lpage>93</lpage>  
        <pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.107.185649</pub-id>
        <pub-id pub-id-type="medline">17671237</pub-id>
        <pub-id pub-id-type="pii">CIRCULATIONAHA.107.185649</pub-id></nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Klepeis</surname>
            <given-names>NE</given-names>
          </name>
          <name name-style="western">
            <surname>Nelson</surname>
            <given-names>WC</given-names>
          </name>
          <name name-style="western">
            <surname>Ott</surname>
            <given-names>WR</given-names>
          </name>
          <name name-style="western">
            <surname>Robinson</surname>
            <given-names>JP</given-names>
          </name>
          <name name-style="western">
            <surname>Tsang</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Switzer</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Behar</surname>
            <given-names>JV</given-names>
          </name>
          <name name-style="western">
            <surname>Hern</surname>
            <given-names>SC</given-names>
          </name>
          <name name-style="western">
            <surname>Engelmann</surname>
            <given-names>WH</given-names>
          </name>
        </person-group>
        <article-title>The National Human Activity Pattern Survey (NHAPS): a resource for assessing exposure to environmental pollutants</article-title>
        <source>J Expo Anal Environ Epidemiol</source>  
        <year>2001</year>  
        <volume>11</volume>  
        <fpage>231</fpage>  
        <lpage>52</lpage>  
        <pub-id pub-id-type="doi">10.1038/sj.jea.7500165</pub-id>
        <pub-id pub-id-type="medline">11477521</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Perry</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Herrington</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Goldsack</surname>
            <given-names>JC</given-names>
          </name>
          <name name-style="western">
            <surname>Grandinetti</surname>
            <given-names>CA</given-names>
          </name>
          <name name-style="western">
            <surname>Vasisht</surname>
            <given-names>KP</given-names>
          </name>
          <name name-style="western">
            <surname>Landray</surname>
            <given-names>MJ</given-names>
          </name>
          <name name-style="western">
            <surname>Bataille</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>DiCicco</surname>
            <given-names>RA</given-names>
          </name>
          <name name-style="western">
            <surname>Bradley</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Narayan</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Papadopoulos</surname>
            <given-names>EJ</given-names>
          </name>
          <name name-style="western">
            <surname>Sheth</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Skodacek</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Stem</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Strong</surname>
            <given-names>TV</given-names>
          </name>
          <name name-style="western">
            <surname>Walton</surname>
            <given-names>MK</given-names>
          </name>
          <name name-style="western">
            <surname>Corneli</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Use of mobile devices to measure outcomes in clinical research, 2010-2016: a systematic literature review</article-title>
        <source>Digit Biomark</source>  
        <year>2018</year>  
        <volume>2</volume>  
        <issue>1</issue>  
        <fpage>11</fpage>  
        <lpage>30</lpage>  
        <pub-id pub-id-type="doi">10.1159/000486347</pub-id>
        <pub-id pub-id-type="medline">29938250</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Murray</surname>
            <given-names>CS</given-names>
          </name>
          <name name-style="western">
            <surname>Poletti</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Kebadze</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Morris</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Woodcock</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Johnston</surname>
            <given-names>SL</given-names>
          </name>
          <name name-style="western">
            <surname>Custovic</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Study of modifiable risk factors for asthma exacerbations: virus infection and allergen exposure increase the risk of asthma hospital admissions in children</article-title>
        <source>Thorax</source>  
        <year>2006</year>  
        <month>05</month>  
        <volume>61</volume>  
        <issue>5</issue>  
        <fpage>376</fpage>  
        <lpage>82</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16384881"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/thx.2005.042523</pub-id>
        <pub-id pub-id-type="medline">16384881</pub-id>
        <pub-id pub-id-type="pii">thx.2005.042523</pub-id>
        <pub-id pub-id-type="pmcid">PMC2111190</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lucas</surname>
            <given-names>SR</given-names>
          </name>
          <name name-style="western">
            <surname>Platts-Mills</surname>
            <given-names>TA</given-names>
          </name>
        </person-group>
        <article-title>Physical activity and exercise in asthma: relevance to etiology and treatment</article-title>
        <source>J Allergy Clin Immunol</source>  
        <year>2005</year>  
        <month>05</month>  
        <volume>115</volume>  
        <issue>5</issue>  
        <fpage>928</fpage>  
        <lpage>34</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jaci.2005.01.033</pub-id>
        <pub-id pub-id-type="medline">15867847</pub-id>
        <pub-id pub-id-type="pii">S0091674905001557</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cochrane</surname>
            <given-names>LM</given-names>
          </name>
          <name name-style="western">
            <surname>Clark</surname>
            <given-names>CJ</given-names>
          </name>
        </person-group>
        <article-title>Benefits and problems of a physical training programme for asthmatic patients</article-title>
        <source>Thorax</source>  
        <year>1990</year>  
        <month>05</month>  
        <volume>45</volume>  
        <issue>5</issue>  
        <fpage>345</fpage>  
        <lpage>51</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://thorax.bmj.com/cgi/pmidlookup?view=long&amp;pmid=2116678"/>
        </comment>  
        <pub-id pub-id-type="medline">2116678</pub-id>
        <pub-id pub-id-type="pmcid">PMC462468</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Eijkemans</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Mommers</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Draaisma</surname>
            <given-names>JM</given-names>
          </name>
          <name name-style="western">
            <surname>Thijs</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Prins</surname>
            <given-names>MH</given-names>
          </name>
        </person-group>
        <article-title>Physical activity and asthma: a systematic review and meta-analysis</article-title>
        <source>PLoS One</source>  
        <year>2012</year>  
        <volume>7</volume>  
        <issue>12</issue>  
        <fpage>e50775</fpage>  
        <pub-id pub-id-type="doi">10.1371/journal.pone.0050775</pub-id>
        <pub-id pub-id-type="medline">23284646</pub-id>
        <pub-id pub-id-type="pii">e50775</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Robinson</surname>
            <given-names>DM</given-names>
          </name>
          <name name-style="western">
            <surname>Egglestone</surname>
            <given-names>DM</given-names>
          </name>
          <name name-style="western">
            <surname>Hill</surname>
            <given-names>PM</given-names>
          </name>
          <name name-style="western">
            <surname>Rea</surname>
            <given-names>HH</given-names>
          </name>
          <name name-style="western">
            <surname>Richards</surname>
            <given-names>GN</given-names>
          </name>
          <name name-style="western">
            <surname>Robinson</surname>
            <given-names>SM</given-names>
          </name>
        </person-group>
        <article-title>Effects of a physical conditioning programme on asthmatic patients</article-title>
        <source>N Z Med J</source>  
        <year>1992</year>  
        <month>07</month>  
        <day>08</day>  
        <volume>105</volume>  
        <issue>937</issue>  
        <fpage>253</fpage>  
        <lpage>6</lpage>  
        <pub-id pub-id-type="medline">1620508</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Khan</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>YK</given-names>
          </name>
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>TS</given-names>
          </name>
        </person-group>
        <article-title>Accelerometer signal-based human activity recognition using augmented autoregressive model coefficients and artificial neural nets</article-title>
        <year>2008</year>  
        <month>08</month>  
        <day>20</day>  
        <conf-name>30th Annual International Conference of the IEEE Engineering in Medicine and Biology Society</conf-name>
        <conf-date>August 20-25, 2008</conf-date>
        <conf-loc>Vancouver, BC, Canada</conf-loc>
        <fpage>5173</fpage>  
        <lpage>5</lpage>  
        <pub-id pub-id-type="doi">10.1109/IEMBS.2008.4650379</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hache</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Lemaire</surname>
            <given-names>ED</given-names>
          </name>
          <name name-style="western">
            <surname>Baddour</surname>
            <given-names>N</given-names>
          </name>
        </person-group>
        <article-title>Wearable mobility monitoring using a multimedia smartphone platform</article-title>
        <source>IEEE Trans Instrum Meas</source>  
        <year>2011</year>  
        <volume>60</volume>  
        <issue>9</issue>  
        <fpage>3153</fpage>  
        <lpage>61</lpage>  
        <pub-id pub-id-type="doi">10.1109/TIM.2011.2122490</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Anguita</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ghio</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Oneto</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Parra</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Reyes-Ortiz</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Human Activity Recognition on Smartphones Using a Multiclass Hardware-Friendly Support Vector Machine</article-title>
        <year>2012</year>  
        <conf-name>International Workshop on Ambient Assisted Living 2012</conf-name>
        <conf-date>December 3-5, 2012</conf-date>
        <conf-loc>Vitoria-Gasteiz, Spain</conf-loc>
        <fpage>216</fpage>  
        <lpage>23</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.icephd.org/sites/default/files/IWAAL2012.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lara</surname>
            <given-names>OD</given-names>
          </name>
          <name name-style="western">
            <surname>Labrador</surname>
            <given-names>MA</given-names>
          </name>
        </person-group>
        <article-title>A survey on human activity recognition using wearable sensors</article-title>
        <source>IEEE Commun Surv Tutor</source>  
        <year>2013</year>  
        <volume>15</volume>  
        <issue>3</issue>  
        <fpage>1192</fpage>  
        <lpage>209</lpage>  
        <pub-id pub-id-type="doi">10.1109/SURV.2012.110112.00192</pub-id></nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Torres-Huitzil</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Alvarez-Landero</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Adibi</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>Accelerometer-Based Human Activity Recognition in Smartphones for Healthcare Services</article-title>
        <source>Mobile Health - A Technology Road Map</source>  
        <year>2015</year>  
        <publisher-loc>Cham, Switzerland</publisher-loc>
        <publisher-name>Springer International Publishing</publisher-name>
        <fpage>147</fpage>  
        <lpage>69</lpage> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Keogh</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Chu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hart</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Pazzani</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <person-group person-group-type="editor">
          <name name-style="western">
            <surname>Last</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Kandel</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Bunke</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Segmenting time series: a survey and novel approach</article-title>
        <source>Data Mining in Time Series Databases</source>  
        <year>2004</year>  
        <publisher-loc>Singapore</publisher-loc>
        <publisher-name>World Scientific Publishing Co Pte Ltd</publisher-name>
        <fpage>1</fpage>  
        <lpage>21</lpage> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Booth</surname>
            <given-names>NB</given-names>
          </name>
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>AF</given-names>
          </name>
        </person-group>
        <article-title>A Bayesian approach to retrospective identification of change-points</article-title>
        <source>J Econom</source>  
        <year>1982</year>  
        <volume>19</volume>  
        <issue>1</issue>  
        <fpage>7</fpage>  
        <lpage>22</lpage>  
        <pub-id pub-id-type="doi">10.1016/0304-4076(82)90048-3</pub-id></nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Galeano</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Wied</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Multiple break detection in the correlation structure of random variables</article-title>
        <source>Comput Stat Data Anal</source>  
        <year>2014</year>  
        <volume>76</volume>  
        <fpage>262</fpage>  
        <lpage>82</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.csda.2013.02.031</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nystrup</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Madsen</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Lindstrom</surname>
            <given-names>E</given-names>
          </name>
        </person-group>
        <article-title>Long memory of financial time series and hidden Markov models with time-varying parameters</article-title>
        <source>J Forecast</source>  
        <year>2017</year>  
        <volume>36</volume>  
        <issue>8</issue>  
        <fpage>989</fpage>  
        <lpage>1002</lpage>  
        <pub-id pub-id-type="doi">10.1002/for.2447</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Katz</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Crammer</surname>
            <given-names>K</given-names>
          </name>
        </person-group>
        <source>arXiv e-prints</source>  
        <access-date>2018-12-03</access-date>
        <comment>Outlier-Robust Convex Segmentation 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://adsabs.harvard.edu/abs/2014arXiv1411.4503K">http://adsabs.harvard.edu/abs/2014arXiv1411.4503K</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="73utxwLSR"/></comment> </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hallac</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Nystrup</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Boyd</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <source>arXiv e-prints</source>  
        <access-date>2018-12-03</access-date>
        <comment>Greedy Gaussian Segmentation of Multivariate Time Series 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://adsabs.harvard.edu/abs/2016arXiv161007435H">http://adsabs.harvard.edu/abs/2016arXiv161007435H</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="73uu9z7Al"/></comment> </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Anguita</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Ghio</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Oneto</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Parra</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Reyes-Ortiz</surname>
            <given-names>JL</given-names>
          </name>
        </person-group>
        <article-title>A Public Domain Dataset for Human Activity Recognition using Smartphones</article-title>
        <source>ESANN 2013 proceedings</source>  
        <year>2013</year>  
        <conf-name>21st European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning</conf-name>
        <conf-date>April 24-26, 2013</conf-date>
        <conf-loc>Belgium</conf-loc>
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2013-84.pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hosseini</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Fazeli</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Vliet</surname>
            <given-names>EV</given-names>
          </name>
          <name name-style="western">
            <surname>Valencia</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Habre</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Sarrafzadeh</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Bui</surname>
            <given-names>AA</given-names>
          </name>
        </person-group>
        <article-title>Children Activity Recognition: Challenges and Strategies</article-title>
        <source>Conf Proc IEEE Eng Med Biol Soc</source>  
        <year>2018</year>  
        <month>07</month>  
        <conf-name>Annual International Conference of the IEEE Engineering in Medicine and Biology Society</conf-name>
        <conf-date>July 17-21, 2018</conf-date>
        <conf-loc>Honolulu</conf-loc>
        <fpage>4331</fpage>  
        <lpage>4334</lpage>  
        <pub-id pub-id-type="doi">10.1109/EMBC.2018.8513320</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hosseini</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Buonocore</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Hashemzadeh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hojaiji</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Kalantarian</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Sideris</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Bui</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>King</surname>
            <given-names>CE</given-names>
          </name>
          <name name-style="western">
            <surname>Sarrafzadeh</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Feasibility of a secure wireless sensing smartwatch application for the self-management of pediatric asthma</article-title>
        <source>Sensors (Basel)</source>  
        <year>2017</year>  
        <volume>17</volume>  
        <issue>8</issue>  
        <fpage>E1780</fpage>  
        <pub-id pub-id-type="doi">10.3390/s17081780</pub-id>
        <pub-id pub-id-type="medline">28771168</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hosseini</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Buonocore</surname>
            <given-names>CM</given-names>
          </name>
          <name name-style="western">
            <surname>Hashemzadeh</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Hojaiji</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Kalantarian</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Sideris</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Bui</surname>
            <given-names>AA</given-names>
          </name>
          <name name-style="western">
            <surname>King</surname>
            <given-names>CE</given-names>
          </name>
          <name name-style="western">
            <surname>Sarrafzadeh</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>HIPAA compliant wireless sensing smartwatch application for the self-management of pediatric asthma</article-title>
        <year>2016</year>  
        <month>06</month>  
        <day>14</day>  
        <conf-name>13th International Conference on Wearable and Implantable Body Sensor Networks (BSN)</conf-name>
        <conf-date>June 14-17, 2016</conf-date>
        <conf-loc>San Francisco</conf-loc>
        <fpage>49</fpage>  
        <lpage>54</lpage>  
        <pub-id pub-id-type="doi">10.1109/BSN.2016.7516231</pub-id></nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Selesnick</surname>
            <given-names>IW</given-names>
          </name>
          <name name-style="western">
            <surname>Burrus</surname>
            <given-names>CS</given-names>
          </name>
        </person-group>
        <article-title>Generalized digital Butterworth filter design</article-title>
        <source>IEEE Trans Signal Process</source>  
        <year>1998</year>  
        <volume>46</volume>  
        <issue>6</issue>  
        <fpage>1688</fpage>  
        <lpage>94</lpage>  
        <pub-id pub-id-type="doi">10.1109/78.678493</pub-id></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kaur</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Comparison of different approaches for removal of baseline wander from ECG signal</article-title>
        <source>Proceedings of the International Conference &amp; Workshop on Emerging Trends in Technology</source>  
        <year>2011</year>  
        <conf-name>ICWET'11</conf-name>
        <conf-date>February 25-26, 2011</conf-date>
        <conf-loc>Mumbai, Maharashtra, India</conf-loc>
        <pub-id pub-id-type="doi">10.1145/1980022.1980307</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Guesrin</surname>
            <given-names>C</given-names>
          </name>
        </person-group>
        <source>arXiv e-prints</source>  
        <year>2016</year>  
        <access-date>2018-12-01</access-date>
        <comment>XGBoost: A Scalable Tree Boosting System 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://adsabs.harvard.edu/abs/2016arXiv160302754C">http://adsabs.harvard.edu/abs/2016arXiv160302754C</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="73uuGAveZ"/></comment> </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Rowlands</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Powell</surname>
            <given-names>SM</given-names>
          </name>
          <name name-style="western">
            <surname>Humphries</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Eston</surname>
            <given-names>RG</given-names>
          </name>
        </person-group>
        <article-title>The effect of accelerometer epoch on physical activity output measures</article-title>
        <source>J Exerc Sci Fit</source>  
        <year>2006</year>  
        <volume>4</volume>  
        <issue>1</issue>  
        <fpage>52</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.579.6730&amp;rep=rep1&amp;type=pdf"/>
        </comment> </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Qu</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Q</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>A new method for human action recognition: Discrete HMM with improved LBG algorithm</article-title>
        <year>2015</year>  
        <month>9</month>  
        <day>25</day>  
        <conf-name>9th International Conference on Anti-counterfeiting, Security, and Identification</conf-name>
        <conf-date>September 25-27, 2015</conf-date>
        <conf-loc>Xiamen, China</conf-loc>
        <pub-id pub-id-type="doi">10.1109/ICASID.2015.7405672</pub-id></nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Gedat</surname>
            <given-names>E</given-names>
          </name>
          <name name-style="western">
            <surname>Fechner</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Fiebelkorn</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Vandenhouten</surname>
            <given-names>R</given-names>
          </name>
        </person-group>
        <article-title>Human action recognition with hidden Markov models and neural network derived poses</article-title>
        <year>2017</year>  
        <month>09</month>  
        <day>14</day>  
        <conf-name>15th International Symposium on Intelligent Systems and Informatics</conf-name>
        <conf-date>September 14-16, 2017</conf-date>
        <conf-loc>Subotica, Serbia</conf-loc>
        <pub-id pub-id-type="doi">10.1109/SISY.2017.8080544</pub-id></nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
