@inproceedings{afa71782e0b04ae6b51a61a447133435,
title = "A cost-effective, case-control study on the association between breast cancer and pregnancy through web mining",
abstract = "We report a case-control epidemiological study through mining people's stories from the Internet. Our overarching goal is to test whether mining openly available, personal stories from the Internet is a cost-effective way for reliable epidemiological discoveries. As a case study, we focus on the association between breast cancer risk and pregnancy, which is clearly established through controlled clinical survey studies. Specifically, we automatically collected and mined 30,000 online obituary articles via a series of tailored cyber-informatics tools we developed. Replicating a case-control study design, we analyzed the collected data confirming with statistical significance that parity is associated with lower breast cancer risk. Our web mining study demonstrates promising preliminary evidence that online content mining can be a cost-effective and reliable way for epidemiological knowledge discovery.",
keywords = "breast cancer, case-control study, epidemiology, obituary, web mining",
author = "Yoon, {Hong Jun} and Songhua Xu and Georgia Tourassi",
year = "2013",
doi = "10.1109/BSEC.2013.6618493",
language = "English",
isbn = "9781479921188",
series = "Proceedings of the 2013 4th Annual ORNL Biomedical Sciences and Engineering Conference: Collaborative Biomedical Innovations, BSEC 2013",
booktitle = "Proceedings of the 2013 4th Annual ORNL Biomedical Sciences and Engineering Conference",
note = "2013 4th Annual ORNL Biomedical Sciences and Engineering Conference: Collaborative Biomedical Innovations, BSEC 2013 ; Conference date: 21-05-2013 Through 23-05-2013",
}