Cylon uses OpenMPI underneath to be fast and scalable. It provides core data processing operators many times efficiently than current systems. Cylon is designed to work across different data processing frameworks, deep learning frameworks and data formats.

Selected Publications

More refernces can be found at the Publications Page

    1. Sarker, A. K., Alsaadi, A., Halpern, A. J., Tangella, P., Titov, M., Perera, N., Staylor, M., von Laszewski, G., Jha, S., & Fox, G. (2026). Deep RC: A Scalable Data Engineering and Deep Learning Pipeline. In D. Klusáček, J. Corbalán, & G. P. Rodrigo (Eds.), Job Scheduling Strategies for Parallel Processing (Vol. 14594, pp. 205–223). Springer Nature Switzerland. https://doi.org/10.1007/978-3-032-10507-3_12
      [DOI] [URL]
      [BibTeX]
      @inproceedings{las2026deeprc,
        keywords = {cylon},
        author = {Sarker, Arup Kumar and Alsaadi, Aymen and Halpern, Alexander James and Tangella, Prabhath and Titov, Mikhail and Perera, Niranda and Staylor, Mills and von Laszewski, Gregor and Jha, Shantenu and Fox, Geoffrey},
        editor = {Klus{\'a}{\v{c}}ek, Dalibor and Corbal{\'a}n, Julita and Rodrigo, Gonzalo P.},
        title = {Deep {RC}: A Scalable Data Engineering and Deep Learning Pipeline},
        booktitle = {Job Scheduling Strategies for Parallel Processing},
        series = {Lecture Notes in Computer Science},
        volume = {14594},
        year = {2026},
        publisher = {Springer Nature Switzerland},
        address = {Cham},
        pages = {205--223},
        doi = {10.1007/978-3-032-10507-3_12},
        isbn = {978-3-032-10507-3},
        url = {https://arxiv.org/abs/2502.20724}
      }
      
    2. Staylor, M., Sarker, A. K., von Laszewski, G., Fox, G., Cheng, Y., & Fox, J. (2025). Combining Serverless and High-Performance Computing Paradigms to support ML Data-Intensive Applications. https://arxiv.org/abs/2511.12185
      [URL]
      [BibTeX]
      @misc{las-2025-serverless,
        keywords = {cylon},
        title = {Combining Serverless and High-Performance Computing Paradigms to support ML Data-Intensive Applications},
        author = {Staylor, Mills and Sarker, Arup Kumar and von Laszewski, Gregor and Fox, Geoffrey and Cheng, Yue and Fox, Judy},
        year = {2025},
        eprint = {2511.12185},
        archiveprefix = {arXiv},
        primaryclass = {cs.DC},
        url = {https://arxiv.org/abs/2511.12185}
      }
      
    3. Sarker, A. K., Alsaadi, A., Perera, N., Staylor, M., von Laszewski, G., Turilli, M., Kilic, O. O., Titov, M., Merzky, A., Jha, S., & Fox, G. (2025). Radical-Cylon: A Heterogeneous Data Pipeline for Scientific Computing. In D. Klusáček, J. Corbalán, & G. P. Rodrigo (Eds.), Job Scheduling Strategies for Parallel Processing (pp. 84–102). Springer Nature Switzerland.
      [BibTeX]
      @inproceedings{las2025radicalcylon,
        keywords = {cylon},
        author = {Sarker, Arup Kumar and Alsaadi, Aymen and Perera, Niranda and Staylor, Mills and von Laszewski, Gregor and Turilli, Matteo and Kilic, Ozgur Ozan and Titov, Mikhail and Merzky, Andre and Jha, Shantenu and Fox, Geoffrey},
        editor = {Klus{\'a}{\v{c}}ek, Dalibor and Corbal{\'a}n, Julita and Rodrigo, Gonzalo P.},
        title = {Radical-Cylon: A Heterogeneous Data Pipeline for Scientific Computing},
        booktitle = {Job Scheduling Strategies for Parallel Processing},
        year = {2025},
        publisher = {Springer Nature Switzerland},
        address = {Cham},
        pages = {84--102},
        isbn = {978-3-031-74430-3}
      }
      
    4. Perera, N., Sarker, A. K., Shan, K., Fetea, A., Kamburugamuve, S., Kanewala, T. A., Widanage, C., Staylor, M., Zhong, T., Abeykoon, V., von Laszewski, G., & Fox, G. (2024). Supercharging distributed computing environments for high-performance data engineering. Frontiers in High Performance Computing, Volume 2 - 2024. https://doi.org/10.3389/fhpcp.2024.1384619
      [DOI] [URL]
      [BibTeX]
      @article{las2024supercharge,
        keywords = {cylon},
        author = {Perera, Niranda and Sarker, Arup Kumar and Shan, Kaiying and Fetea, Alex and Kamburugamuve, Supun and Kanewala, Thejaka Amila and Widanage, Chathura and Staylor, Mills and Zhong, Tianle and Abeykoon, Vibhatha and von Laszewski, Gregor and Fox, Geoffrey},
        title = {Supercharging distributed computing environments for high-performance data engineering},
        journal = {Frontiers in High Performance Computing},
        volume = {Volume 2 - 2024},
        year = {2024},
        url = {https://www.frontiersin.org/journals/high-performance-computing/articles/10.3389/fhpcp.2024.1384619},
        doi = {10.3389/fhpcp.2024.1384619},
        issn = {2813-7337}
      }
      
    5. Sarker, A. K., Alsaadi, A., Perera, N., Staylor, M., von Laszewski, G., Turilli, M., Kilic, O. O., Titov, M., Merzky, A., Jha, S., & Fox, G. (2024). Design and Implementation of an Analysis Pipeline for Heterogeneous Data. https://arxiv.org/abs/2403.15721
      [URL]
      [BibTeX]
      @misc{las-2024-analysispipleine,
        keywords = {cylon},
        title = {Design and Implementation of an Analysis Pipeline for Heterogeneous Data},
        author = {Sarker, Arup Kumar and Alsaadi, Aymen and Perera, Niranda and Staylor, Mills and von Laszewski, Gregor and Turilli, Matteo and Kilic, Ozgur Ozan and Titov, Mikhail and Merzky, Andre and Jha, Shantenu and Fox, Geoffrey},
        year = {2024},
        eprint = {2403.15721},
        archiveprefix = {arXiv},
        primaryclass = {cs.DC},
        url = {https://arxiv.org/abs/2403.15721}
      }
      
    6. Sakar, A., Perera, N., von Laszewski, G., Staylor, M., & Fox, G. C. (2023, October). Heterogeneous DataPipelines for Scientific Computing. 2023 OLCF User Meeting. https://github.com/cyberaide/poster-summit-cylon/blob/main/vonLaszewski-heterogeneous-data-pipeline-2.pdf
      [URL]
      [BibTeX]
      @inproceedings{las-2023-poster-cylon-ornl,
        keywords = {cylon},
        author = {Sakar, Arup and Perera, Niranda and von Laszewski, Gregor and Staylor, Mills and Fox, Geoffrey C.},
        title = {Heterogeneous DataPipelines for Scientific Computing},
        month = oct,
        year = {2023},
        url = {https://github.com/cyberaide/poster-summit-cylon/blob/main/vonLaszewski-heterogeneous-data-pipeline-2.pdf},
        address = {Oak Ridge National Laboratory, Knoxville, TN},
        booktitle = {2023 OLCF User Meeting},
        note = {Poster}
      }
      
    7. Abeykoon, V., Kamburugamuve, S., Widanage, C., Perera, N., Uyar, A., Kanewala, T. A., von Laszewski, G., & Fox, G. (2022). HPTMT Parallel Operators for High Performance Data Science and Data Engineering. Frontiers in Big Data, 4. https://doi.org/10.3389/fdata.2021.756041
      [DOI] [URL]
      [BibTeX]
      @article{las-2022-hptmt-frontiers,
        keywords = {cylon},
        author = {Abeykoon, Vibhatha and Kamburugamuve, Supun and Widanage, Chathura and Perera, Niranda and Uyar, Ahmet and Kanewala, Thejaka Amila and von Laszewski, Gregor and Fox, Geoffrey},
        title = {HPTMT Parallel Operators for High Performance Data Science and Data Engineering},
        journal = {Frontiers in Big Data},
        volume = {4},
        year = {2022},
        url = {https://www.frontiersin.org/articles/10.3389/fdata.2021.756041},
        doi = {10.3389/fdata.2021.756041},
        issn = {2624-909X}
      }