@inproceedings{9fd234eb600848cc8ab06ff6c8352a68,
title = "Training data selection for event classification in a highly variable environment",
abstract = "A problem of interest for nuclear nonproliferation is monitoring activities at nuclear facilities, where proliferation events may only take place a few times and often under variable conditions. Machine learning has revolutionized data analytics by enabling the use of measurable signatures to generate predictive models of facility operations. However, traditional methods for training these models require large, reliable data sets with labeled observations, a challenge for nonproliferation. Highly variable conditions further complicate this as events from training data may have occurred in conditions quite different from the event of interest. Our hypothesis is that when events occur in a highly variable environment, careful training data selection for each test event could outperform the standard approach of using all available training data. We developed a method to optimize training data selection for the given test event and applied it to predicting the power level of the High Flux Isotope Reactor (HFIR) at Oak Ridge National Laboratory. In this study, the reactor startup exhibits variability between occurrences due to natural variability in environmental conditions and operational procedures. Using a combination of analysis techniques, a similitude assessment was performed on data collected from HFIR to isolate clusters that were optimal for training a predictive model. Concepts such as dynamic time warping and Jaccard similarity were used in conjunction with clustering analysis. In order to validate this approach, the model was trained on every combination of unique training events and the predictive performance was compared to the performance using a subset of the training data selected by isolated clusters found through the similitude assessment.",
keywords = "Jaccard, dynamic time warping, k-means, similitude, supervised learning, unsupervised learning",
author = "Anand Iyer and Garrison Flynn and Nidhi Parikh and Daniel Archer and Thomas Karnowski and Monica Maceira and Omar Marcillo and Andrew Nicholson and Will Ray and Randall Wetherington and Michael Willis",
note = "Publisher Copyright: {\textcopyright} 2022 SPIE.; Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications IV 2022 ; Conference date: 06-06-2022 Through 12-06-2022",
year = "2022",
doi = "10.1117/12.2617153",
language = "English",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
publisher = "SPIE",
editor = "Tien Pham and Latasha Solomon",
booktitle = "Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications IV",
}