<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record>
  <controlfield tag="001">165093</controlfield>
  <controlfield tag="005">20251212165957.0</controlfield>
  <datafield tag="024" ind1="7" ind2=" ">
    <subfield code="2">doi</subfield>
    <subfield code="a">10.1016/j.fsidi.2025.301930</subfield>
  </datafield>
  <datafield tag="024" ind1="8" ind2=" ">
    <subfield code="2">sideral</subfield>
    <subfield code="a">146651</subfield>
  </datafield>
  <datafield tag="037" ind1=" " ind2=" ">
    <subfield code="a">ART-2025-146651</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="a">Huici, Daniel</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">An extensible and scalable system for hash lookup and approximate similarity search with similarity digest algorithms</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2025</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
    <subfield code="a">Efficient management and analysis of large volumes of digital data has emerged as a major challenge in the field of digital forensics. To quickly identify and analyze relevant artifacts within large datasets, we introduce APOTHEOSIS, an approximate similarity search system designed for scalability and efficiency. Our system integrates approximate search techniques (which allow searching for a match on a close value) with Similarity Digest Algorithms (SDA; which capture common features between similar elements), using a space-saving radix tree and a graph-based hierarchical navigable small world structure to perform fast approximate nearest neighbor searches. We demonstrate the effectiveness and versatility of our system through two key case studies: first, in plagiarism detection, demonstrating the effectiveness of our system in identifying similar or duplicate documents within a large source code dataset; then, in memory artifact detection, showing its scalability and performance in processing large-scale forensic data collected from various versions of Microsoft Windows. Our comprehensive evaluation shows that APOTHEOSIS not only efficiently handles large datasets, but also provides a way to evaluate the performance of various SDA and their approximate similarity search in different forensic scenarios.</subfield>
  </datafield>
  <datafield tag="506" ind1="0" ind2=" ">
    <subfield code="a">Access copy available to the general public</subfield>
    <subfield code="f">Unrestricted</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/grantAgreement/ES/AEI/PID2020-113903RB-I00</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/ES/DGA/T21-23R</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/ES/DGA/T42-23R</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/ES/MCIU/PID2023-151467OA-I00</subfield>
    <subfield code="9">info:eu-repo/grantAgreement/EUR/MICINN/TED2021-131115A-I00</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/semantics/openAccess</subfield>
    <subfield code="a">by-nc-nd</subfield>
    <subfield code="u">https://creativecommons.org/licenses/by-nc-nd/4.0/deed.es</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="4">
    <subfield code="a">info:eu-repo/semantics/article</subfield>
    <subfield code="v">info:eu-repo/semantics/publishedVersion</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Rodríguez, Ricardo J.</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0001-7982-0359</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Mena, Eduardo</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0002-7462-0080</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
    <subfield code="1">5007</subfield>
    <subfield code="2">570</subfield>
    <subfield code="a">Universidad de Zaragoza</subfield>
    <subfield code="b">Dpto. Informát.Ingenie.Sistms.</subfield>
    <subfield code="c">Área Lenguajes y Sistemas Inf.</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="g">53 (2025), 301930 [9 pp.]</subfield>
    <subfield code="p">Forensic sci. int. digital invest.</subfield>
    <subfield code="t">Forensic science international. Digital investigation</subfield>
    <subfield code="x">2666-2825</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">3594962</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/165093/files/texto_completo.pdf</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">2658292</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/165093/files/texto_completo.jpg?subformat=icon</subfield>
    <subfield code="x">icon</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="o">oai:zaguan.unizar.es:165093</subfield>
    <subfield code="p">articulos</subfield>
    <subfield code="p">driver</subfield>
  </datafield>
  <datafield tag="951" ind1=" " ind2=" ">
    <subfield code="a">2025-12-12-14:42:32</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">ARTICLE</subfield>
  </datafield>
</record>
</collection>