
@Article{cmc.2025.073771,
AUTHOR = {Jinzheng Yu, Yang Xu, Haozhen Li, Junqi Li, Ligu Zhu, Hao Shen, Lei Shi},
TITLE = {OPOR-Bench: Evaluating Large Language Models on Online Public Opinion Report Generation},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {87},
YEAR = {2026},
NUMBER = {1},
PAGES = {--},
URL = {http://www.techscience.com/cmc/v87n1/66073},
ISSN = {1546-2226},
ABSTRACT = {Online Public Opinion Reports consolidate news and social media for timely crisis management by governments and enterprises. While large language models (LLMs) enable automated report generation, this specific domain lacks formal task definitions and corresponding benchmarks. To bridge this gap, we define the Automated Online Public Opinion Report Generation (OPOR-Gen) task and construct OPOR-Bench, an event-centric dataset with 463 crisis events across 108 countries (comprising 8.8 K news articles and 185 K tweets). To evaluate report quality, we propose OPOR-Eval, a novel agent-based framework that simulates human expert evaluation. Validation experiments show OPOR-Eval achieves a high Spearman’s correlation (ρ = 0.70) with human judgments, though challenges in temporal reasoning persist. This work establishes an initial foundation for advancing automated public opinion reporting research.},
DOI = {10.32604/cmc.2025.073771}
}



