<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record>
  <controlfield tag="001">121235</controlfield>
  <controlfield tag="005">20260112133149.0</controlfield>
  <datafield tag="024" ind1="7" ind2=" ">
    <subfield code="2">doi</subfield>
    <subfield code="a">10.1109/TASLP.2022.3224282</subfield>
  </datafield>
  <datafield tag="024" ind1="8" ind2=" ">
    <subfield code="2">sideral</subfield>
    <subfield code="a">131864</subfield>
  </datafield>
  <datafield tag="037" ind1=" " ind2=" ">
    <subfield code="a">ART-2023-131864</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="a">Diaz-Guerra, David</subfield>
    <subfield code="0">(orcid)0000-0002-1041-0498</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Direction of arrival estimation of sound sources using icosahedral CNNs</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2023</subfield>
  </datafield>
  <datafield tag="506" ind1="0" ind2=" ">
    <subfield code="a">Access copy available to the general public</subfield>
    <subfield code="f">Unrestricted</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
    <subfield code="a">In this paper, we present a new model for Direction of Arrival (DOA) estimation of sound sources based on an Icosahedral Convolutional Neural Network (CNN) applied over SRP-PHAT power maps computed from the signals received by a microphone array. This icosahedral CNN is equivariant to the 60 rotational symmetries of the icosahedron, which represent a good approximation of the continuous space of spherical rotations, and can be implemented using standard 2D convolutional layers, having a lower computational cost than most of the spherical CNNs. In addition, instead of using fully connected layers after the icosahedral convolutions, we propose a new soft-argmax function that can be seen as a differentiable version of the argmax function and allows us to solve the DOA estimation as a regression problem interpreting the output of the convolutional layers as a probability distribution. We prove that using models that fit the equivariances of the problem allows us to outperform other state-of-the-art models with a lower computational cost and more robustness, obtaining root mean square localization errors lower than 10∘ even in scenarios with a reverberation time T60 of 1.5s .</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/grantAgreement/ES/DGA-FEDER/2014-2020</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/semantics/closedAccess</subfield>
    <subfield code="a">All rights reserved</subfield>
    <subfield code="u">http://www.europeana.eu/rights/rr-f/</subfield>
  </datafield>
  <datafield tag="590" ind1=" " ind2=" ">
    <subfield code="a">4.1</subfield>
    <subfield code="b">2023</subfield>
  </datafield>
  <datafield tag="591" ind1=" " ind2=" ">
    <subfield code="a">ACOUSTICS</subfield>
    <subfield code="b">4 / 40 = 0.1</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
    <subfield code="e">T1</subfield>
  </datafield>
  <datafield tag="591" ind1=" " ind2=" ">
    <subfield code="a">ENGINEERING, ELECTRICAL &amp; ELECTRONIC</subfield>
    <subfield code="b">94 / 353 = 0.266</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q2</subfield>
    <subfield code="e">T1</subfield>
  </datafield>
  <datafield tag="592" ind1=" " ind2=" ">
    <subfield code="a">1.542</subfield>
    <subfield code="b">2023</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Acoustics and Ultrasonics</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Computational Mathematics</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Computer Science (miscellaneous)</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Speech and Hearing</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Instrumentation</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Media Technology</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Signal Processing</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Electrical and Electronic Engineering</subfield>
    <subfield code="c">2023</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="594" ind1=" " ind2=" ">
    <subfield code="a">11.3</subfield>
    <subfield code="b">2023</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="4">
    <subfield code="a">info:eu-repo/semantics/article</subfield>
    <subfield code="v">info:eu-repo/semantics/acceptedVersion</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Miguel, Antonio</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0001-5803-4316</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Beltran, Jose R.</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0002-7500-4650</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
    <subfield code="1">5008</subfield>
    <subfield code="2">785</subfield>
    <subfield code="a">Universidad de Zaragoza</subfield>
    <subfield code="b">Dpto. Ingeniería Electrón.Com.</subfield>
    <subfield code="c">Área Tecnología Electrónica</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
    <subfield code="1">5008</subfield>
    <subfield code="2">800</subfield>
    <subfield code="a">Universidad de Zaragoza</subfield>
    <subfield code="b">Dpto. Ingeniería Electrón.Com.</subfield>
    <subfield code="c">Área Teoría Señal y Comunicac.</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="g">31 (2023), 313-321</subfield>
    <subfield code="p">IEEE/ACM trans. audio speech lang. process.</subfield>
    <subfield code="t">IEEE/ACM Transactions on Audio, Speech, and Language Processing</subfield>
    <subfield code="x">2329-9290</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1944152</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/121235/files/texto_completo.pdf</subfield>
    <subfield code="y">Postprint</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">3423313</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/121235/files/texto_completo.jpg?subformat=icon</subfield>
    <subfield code="x">icon</subfield>
    <subfield code="y">Postprint</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="o">oai:zaguan.unizar.es:121235</subfield>
    <subfield code="p">articulos</subfield>
    <subfield code="p">driver</subfield>
  </datafield>
  <datafield tag="951" ind1=" " ind2=" ">
    <subfield code="a">2026-01-12-12:37:14</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">ARTICLE</subfield>
  </datafield>
</record>
</collection>