@Article{cmc.2025.068500,
AUTHOR = {Yuzhen Liu, Jiasheng Yin, Yixuan Chen, Jin Wang, Xiaolan Zhou, Xiaoliang Wang},
TITLE = {PolyDiffusion: A Multi-Objective Optimized Contour-to-Image Diffusion Framework},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {85},
YEAR = {2025},
NUMBER = {2},
PAGES = {3965--3980},
URL = {http://www.techscience.com/cmc/v85n2/63844},
ISSN = {1546-2226},
ABSTRACT = {Multi-instance image generation remains a challenging task in the field of computer vision. While existing diffusion models demonstrate impressive fidelity in image generation, they often struggle with precisely controlling each object’s shape, pose, and size. Methods like layout-to-image and mask-to-image provide spatial guidance but frequently suffer from object shape distortion, overlaps, and poor consistency, particularly in complex scenes with multiple objects. To address these issues, we introduce PolyDiffusion, a contour-based diffusion framework that encodes each object’s contour as a boundary-coordinate sequence, decoupling object shapes and positions. This approach allows for better control over object geometry and spatial positioning, which is critical for achieving high-quality multi-instance generation. We formulate the training process as a multi-objective optimization problem, balancing three key objectives: a denoising diffusion loss to maintain overall image fidelity, a cross-attention contour alignment loss to ensure precise shape adherence, and a reward-guided denoising objective that minimizes the Fréchet distance to real images. In addition, the Object Space-Aware Attention module fuses contour tokens with visual features, while a prior-guided fusion mechanism utilizes inter-object spatial relationships and class semantics to enhance consistency across multiple objects. Experimental results on benchmark datasets such as COCO-Stuff and VOC-2012 demonstrate that PolyDiffusion significantly outperforms existing layout-to-image and mask-to-image methods, achieving notable improvements in both image quality and instance-level segmentation accuracy. The implementation of PolyDiffusion is available at  (accessed on 06 August 2025).},
DOI = {10.32604/cmc.2025.068500}
}