<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record>
  <controlfield tag="001">112181</controlfield>
  <controlfield tag="005">20240319080957.0</controlfield>
  <datafield tag="024" ind1="7" ind2=" ">
    <subfield code="2">doi</subfield>
    <subfield code="a">10.1016/j.sysarc.2022.102398</subfield>
  </datafield>
  <datafield tag="024" ind1="8" ind2=" ">
    <subfield code="2">sideral</subfield>
    <subfield code="a">128413</subfield>
  </datafield>
  <datafield tag="037" ind1=" " ind2=" ">
    <subfield code="a">ART-2022-128413</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="a">Rodríguez, A.</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Lightweight asynchronous scheduling in heterogeneous reconfigurable systems</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2022</subfield>
  </datafield>
  <datafield tag="506" ind1="0" ind2=" ">
    <subfield code="a">Access copy available to the general public</subfield>
    <subfield code="f">Unrestricted</subfield>
  </datafield>
  <datafield tag="520" ind1="3" ind2=" ">
    <subfield code="a">The trend for heterogeneous embedded systems is the integration of accelerators and general-purpose CPU cores on the same die. In these integrated architectures, like the Zynq UltraScale+ board (CPU+FPGA) that we target in this work, hardware support for shared memory and low-overhead synchronization between the accelerator and the CPU cores make the case for exploring strategies that exploit a tight collaboration between the CPUs and the accelerator. In this paper we propose a novel lightweight scheduling strategy, FastFit, targeted to FPGA accelerators, and a new scheduler based on it, named MultiFastFit, which asynchronously tackles heterogeneous systems comprised of a variety of CPU cores and FPGA IPs. Our strategy significantly reduces the overhead to automatically compute the near-optimal chunksizes when compared to a previous state-of-the-art auto-tuned approach, which makes our approach more suitable for fine-grained applications. Additionally, our scheduler MultiFastFit has been designed to enable the efficient co-execution of work among compute devices in such a way that all the devices are busy while minimizing the load unbalance. Our approaches have been evaluated using four benchmarks carefully tuned for the low-power UltraScale+ platform. Our experiments demonstrate that the FastFit strategy always finds the near-optimal FPGA chunksize for any device configuration at a reasonable cost, even for fine-grained and irregular applications, and that heterogeneous CPU+FPGA co-executions that exploit all the compute devices are usually faster and more energy efficient than the CPU-only and FPGA-only executions. We have also compared MultiFastFit with other state-of-the-art scheduling strategies, finding that it outperforms other auto-tuned approach up to 2x and it achieves similar results to manually-tuned schedulers without requiring an offline search of the ideal CPU-FPGA partition or FPGA chunk granularity. © 2022 The Authors</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/grantAgreement/ES/MICINN/PID2019-105396RB-I00</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="9">info:eu-repo/semantics/openAccess</subfield>
    <subfield code="a">by</subfield>
    <subfield code="u">http://creativecommons.org/licenses/by/3.0/es/</subfield>
  </datafield>
  <datafield tag="590" ind1=" " ind2=" ">
    <subfield code="a">4.5</subfield>
    <subfield code="b">2022</subfield>
  </datafield>
  <datafield tag="591" ind1=" " ind2=" ">
    <subfield code="a">COMPUTER SCIENCE, SOFTWARE ENGINEERING</subfield>
    <subfield code="b">22 / 108 = 0.204</subfield>
    <subfield code="c">2022</subfield>
    <subfield code="d">Q1</subfield>
    <subfield code="e">T1</subfield>
  </datafield>
  <datafield tag="591" ind1=" " ind2=" ">
    <subfield code="a">COMPUTER SCIENCE, HARDWARE &amp; ARCHITECTURE</subfield>
    <subfield code="b">11 / 54 = 0.204</subfield>
    <subfield code="c">2022</subfield>
    <subfield code="d">Q1</subfield>
    <subfield code="e">T1</subfield>
  </datafield>
  <datafield tag="592" ind1=" " ind2=" ">
    <subfield code="a">1.276</subfield>
    <subfield code="b">2022</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Software</subfield>
    <subfield code="c">2022</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="593" ind1=" " ind2=" ">
    <subfield code="a">Hardware and Architecture</subfield>
    <subfield code="c">2022</subfield>
    <subfield code="d">Q1</subfield>
  </datafield>
  <datafield tag="594" ind1=" " ind2=" ">
    <subfield code="a">8.5</subfield>
    <subfield code="b">2022</subfield>
  </datafield>
  <datafield tag="655" ind1=" " ind2="4">
    <subfield code="a">info:eu-repo/semantics/article</subfield>
    <subfield code="v">info:eu-repo/semantics/publishedVersion</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Navarro, A.</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Nikov, K.</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Nunez-Yanez, J.</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Gran Tejero, R.</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0002-4031-5651</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Suárez Gracia, D.</subfield>
    <subfield code="u">Universidad de Zaragoza</subfield>
    <subfield code="0">(orcid)0000-0002-7490-4067</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Asenjo, R.</subfield>
  </datafield>
  <datafield tag="710" ind1="2" ind2=" ">
    <subfield code="1">5007</subfield>
    <subfield code="2">035</subfield>
    <subfield code="a">Universidad de Zaragoza</subfield>
    <subfield code="b">Dpto. Informát.Ingenie.Sistms.</subfield>
    <subfield code="c">Área Arquit.Tecnología Comput.</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="g">124 (2022), 102398 [14 pp]</subfield>
    <subfield code="p">J. systems archit.</subfield>
    <subfield code="t">Journal of Systems Architecture</subfield>
    <subfield code="x">1383-7621</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">2349544</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/112181/files/texto_completo.pdf</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">2577679</subfield>
    <subfield code="u">http://zaguan.unizar.es/record/112181/files/texto_completo.jpg?subformat=icon</subfield>
    <subfield code="x">icon</subfield>
    <subfield code="y">Versión publicada</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="o">oai:zaguan.unizar.es:112181</subfield>
    <subfield code="p">articulos</subfield>
    <subfield code="p">driver</subfield>
  </datafield>
  <datafield tag="951" ind1=" " ind2=" ">
    <subfield code="a">2024-03-18-13:43:25</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">ARTICLE</subfield>
  </datafield>
</record>
</collection>