<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record>
  <controlfield tag="001">119865</controlfield>
  <controlfield tag="005">20230111103847.0</controlfield>
  <datafield tag="024" ind1="7" ind2=" ">
    <subfield code="2">doi</subfield>
    <subfield code="a">10.3233/SW-222899</subfield>
  </datafield>
  <datafield tag="024" ind1="8" ind2=" ">
    <subfield code="2">sideral</subfield>
    <subfield code="a">130783</subfield>
  </datafield>
  <datafield tag="037" ind1=" " ind2=" ">
    <subfield code="a">ART-2022-130783</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="a">Goel, S.</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Bilingual dictionary generation and enrichment via graph exploration</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2022</subfield>
  </datafield>
  <datafield tag="506" ind1="0" ind2=" ">
    <subfield code="a">Access copy available to the general public</subfield>
    <subfield code="f">Unrestricted</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
    <subfield code="a">In recent years, we have witnessed a steady growth of linguistic information represented and exposed as linked data on the Web. Such linguistic linked data have stimulated the development and use of openly available linguistic knowledge graphs, as is the case with the Apertium RDF, a collection of interconnected bilingual dictionaries represented and accessible through Semantic Web standards. In this work, we explore techniques that exploit the graph nature of bilingual dictionaries to automatically infer new links (translations). We build upon a cycle density based method: partitioning the graph into biconnected components for a speed-up, and simplifying the pipeline through a careful structural analysis that reduces hyperparameter tuning requirements. We also analyse the shortcomings of traditional evaluation metrics used for translation inference and propose to complement them with new ones, both-word precision (BWP) and both-word recall (BWR), aimed at being more informative of algorithmic improvements. Over twenty-seven language pairs, our algorithm produces dictionaries about 70% the size of existing Apertium RDF dictionaries at a high BWP of 85% from scratch within a minute. Human evaluation shows that 78% of the additional translations generated for dictionary enrichment are correct as well. We further describe an interesting use-case: inferring synonyms within a single language, on which our initial human-based evaluation shows an average accuracy of 84%. We release our tool as free/open-source software which can not only be applied to RDF data and Apertium dictionaries, but is also easily usable for other formats and communities.</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/grantAgreement/ES/AEI/PID2020-113903RB-I00</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/EC/H2020/825182/EU/Ready-to-use Multilingual Linked Language Data for Knowledge Services across Sectors/Pret-a-LLOD</subfield>
    <subfield code="9">This project has received funding from the European Union’s Horizon 2020 research and innovation program under grant agreement No H2020 825182-Pret-a-LLOD</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/ES/MINECO/RYC2019-028112-I</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/ES/MINECO/TIN2016-78011-C4-3-R</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/semantics/openAccess</subfield>
    <subfield code="a">by</subfield>
    <subfield code="u">http://creativecommons.org/licenses/by/3.0/es/</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="4">
    <subfield code="a">info:eu-repo/semantics/article</subfield>
    <subfield code="v">info:eu-repo/semantics/publishedVersion</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Gracia, J.</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0001-6452-7627</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Forcada, M. L.</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
    <subfield code="1">5007</subfield>
    <subfield code="2">570</subfield>
    <subfield code="a">Universidad de Zaragoza</subfield>
    <subfield code="b">Dpto. Informát.Ingenie.Sistms.</subfield>
    <subfield code="c">Área Lenguajes y Sistemas Inf.</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="g">13, 6 (2022), 1103-1132</subfield>
    <subfield code="t">Semantic Web</subfield>
    <subfield code="x">2210-4968</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">708303</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/119865/files/texto_completo.pdf</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">1891562</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/119865/files/texto_completo.jpg?subformat=icon</subfield>
    <subfield code="x">icon</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="o">oai:zaguan.unizar.es:119865</subfield>
    <subfield code="p">articulos</subfield>
    <subfield code="p">driver</subfield>
  </datafield>
  <datafield tag="951" ind1=" " ind2=" ">
    <subfield code="a">2023-01-11-10:11:13</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">ARTICLE</subfield>
  </datafield>
</record>
</collection>