Good benchmarks are hard to find because they require a substantial effort to keep them representative for the constantly changing challenges of a particular field. Synthetic benchmarks are a common approach to deal with this, and methods from machine learning are natural candidates for synthetic benchmark generation. In this paper we investigate the usefulness of machine learning in the prominent CLgen benchmark generator. We re-evaluate CLgen by comparing the benchmarks generated by the model with the raw data used to train it. This re-evaluation indicates that, for the use case considered, machine learning did not yield additional benefit over a simpler method using the raw data. We investigate the reasons for this and provide further insights into the challenges the problem could pose for potential future generators.
%0 Conference Paper
%1 Goens:19:MLSB
%A Goens, Andrés
%A Brauckmann, Alexander
%A Ertel, Sebastian
%A Cummins, Chris
%A Leather, Hugh
%A Castrillon, Jeronimo
%B Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages
%C New York, NY, USA
%D 2019
%I Association for Computing Machinery
%K Benchmarking CLGen Generative Learning Machine Synthetic generation models program
%P 38–46
%R 10.1145/3315508.3329976
%T A Case Study on Machine Learning for Synthesizing Benchmarks
%X Good benchmarks are hard to find because they require a substantial effort to keep them representative for the constantly changing challenges of a particular field. Synthetic benchmarks are a common approach to deal with this, and methods from machine learning are natural candidates for synthetic benchmark generation. In this paper we investigate the usefulness of machine learning in the prominent CLgen benchmark generator. We re-evaluate CLgen by comparing the benchmarks generated by the model with the raw data used to train it. This re-evaluation indicates that, for the use case considered, machine learning did not yield additional benefit over a simpler method using the raw data. We investigate the reasons for this and provide further insights into the challenges the problem could pose for potential future generators.
%@ 9781450367196
@inproceedings{Goens:19:MLSB,
abstract = {Good benchmarks are hard to find because they require a substantial effort to keep them representative for the constantly changing challenges of a particular field. Synthetic benchmarks are a common approach to deal with this, and methods from machine learning are natural candidates for synthetic benchmark generation. In this paper we investigate the usefulness of machine learning in the prominent CLgen benchmark generator. We re-evaluate CLgen by comparing the benchmarks generated by the model with the raw data used to train it. This re-evaluation indicates that, for the use case considered, machine learning did not yield additional benefit over a simpler method using the raw data. We investigate the reasons for this and provide further insights into the challenges the problem could pose for potential future generators.},
added-at = {2021-03-17T19:22:28.000+0100},
address = {New York, NY, USA},
author = {Goens, Andr\'{e}s and Brauckmann, Alexander and Ertel, Sebastian and Cummins, Chris and Leather, Hugh and Castrillon, Jeronimo},
biburl = {https://www.bibsonomy.org/bibtex/2482c8f66c348a39c0a42f81264394f5f/gron},
booktitle = {Proceedings of the 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages},
doi = {10.1145/3315508.3329976},
interhash = {aaebef82ab7c6766bf476cc8e05f443a},
intrahash = {482c8f66c348a39c0a42f81264394f5f},
isbn = {9781450367196},
keywords = {Benchmarking CLGen Generative Learning Machine Synthetic generation models program},
location = {Phoenix, AZ, USA},
numpages = {9},
pages = {38–46},
publisher = {Association for Computing Machinery},
series = {MAPL 2019},
timestamp = {2021-03-17T19:22:28.000+0100},
title = {{A Case Study on Machine Learning for Synthesizing Benchmarks}},
year = 2019
}