@inproceedings{<LineBreak>vo2026hellinger,
title = {Hellinger Multimodal Variational Autoencoders},
author = {Huyen Thuc Khanh Vo and Isabel Valera},
url = {https://openreview.net/forum?id=mxHyYltMUa},
year = {2026},
date = {2026-01-01},
urldate = {2026-01-01},
booktitle = {The 29th International Conference on Artificial Intelligence and Statistics},
abstract = {Multimodal variational autoencoders (VAEs) are widely used for weakly supervised generative learning with multiple modalities. Predominant methods aggregate unimodal inference distributions using either a product of experts (PoE), a mixture of experts (MoE), or their combinations to approximate the joint posterior. In this work, we revisit multimodal inference through the lens of probabilistic opinion pooling, an optimization-based approach. We start from Hölder pooling with α=0.5, which corresponds to the unique symmetric member of the α-divergence family, and derive a moment-matching approximation, termed Hellinger. We then leverage such an approximation to propose HELVAE, a multimodal VAE that avoids sub-sampling, yielding an efficient yet effective model that: (i) learns more expressive latent representations as additional modalities are observed; and (ii) empirically achieves better trade-offs between generative coherence and quality, outperforming state-of-the-art multimodal VAE models.},
keywords = {huyen, isabel, spotlight},
pubstate = {published},
tppubtype = {inproceedings}
}