Source code for wlauto.workloads.spec2000

#    Copyright 2014-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


#pylint: disable=E1101,W0201
import operator
import os
import re
import string
import tarfile
from collections import defaultdict

from wlauto import Workload, Parameter, Alias
from wlauto.exceptions import ConfigError, WorkloadError
from wlauto.common.resources import ExtensionAsset
from wlauto.utils.misc import get_cpu_mask
from wlauto.utils.types import boolean, list_or_string


[docs]class Spec2000(Workload): name = 'spec2000' description = """ SPEC2000 benchmarks measuring processor, memory and compiler. http://www.spec.org/cpu2000/ From the web site: SPEC CPU2000 is the next-generation industry-standardized CPU-intensive benchmark suite. SPEC designed CPU2000 to provide a comparative measure of compute intensive performance across the widest practical range of hardware. The implementation resulted in source code benchmarks developed from real user applications. These benchmarks measure the performance of the processor, memory and compiler on the tested system. .. note:: At the moment, this workload relies on pre-built SPEC binaries (included in an asset bundle). These binaries *must* be built according to rules outlined here:: http://www.spec.org/cpu2000/docs/runrules.html#toc_2.0 in order for the results to be valid SPEC2000 results. .. note:: This workload does not attempt to generate results in an admissible SPEC format. No metadata is provided (though some, but not all, of the required metdata is collected by WA elsewhere). It is upto the user to post-process results to generated SPEC-admissible results file, if that is their intention. *base vs peak* SPEC2000 defines two build/test configuration: base and peak. Base is supposed to use basic configuration (e.g. default compiler flags) with no tuning, and peak is specifically optimized for a system. Since this workload uses externally-built binaries, there is no way for WA to be sure what configuration is used -- the user is expected to keep track of that. Be aware that base/peak also come with specific requirements for the way workloads are run (e.g. how many instances on multi-core systems):: http://www.spec.org/cpu2000/docs/runrules.html#toc_3 These are not enforced by WA, so it is again up to the user to ensure that correct workload parameters are specfied inthe agenda, if they intend to collect "official" SPEC results. (Those interested in collecting official SPEC results should also note that setting runtime parameters would violate SPEC runs rules that state that no configuration must be done to the platform after boot). *bundle structure* This workload expects the actual benchmark binaries to be provided in a tarball "bundle" that has a very specific structure. At the top level of the tarball, there should be two directories: "fp" and "int" -- for each of the SPEC2000 categories. Under those, there is a sub-directory per benchmark. Each benchmark sub-directory contains three sub-sub-directorie: - "cpus" contains a subdirectory for each supported cpu (e.g. a15) with a single executable binary for that cpu, in addition to a "generic" subdirectory that has not been optimized for a specific cpu and should run on any ARM system. - "data" contains all additional files (input, configuration, etc) that the benchmark executable relies on. - "scripts" contains one or more one-liner shell scripts that invoke the benchmark binary with appropriate command line parameters. The name of the script must be in the format <benchmark name>[.<variant name>].sh, i.e. name of benchmark, optionally followed by variant name, followed by ".sh" extension. If there is more than one script, then all of them must have a variant; if there is only one script the it should not contain a variant. A typical bundle may look like this:: |- fp | |-- ammp | | |-- cpus | | | |-- generic | | | | |-- ammp | | | |-- a15 | | | | |-- ammp | | | |-- a7 | | | | |-- ammp | | |-- data | | | |-- ammp.in | | |-- scripts | | | |-- ammp.sh | |-- applu . . . . . . . . . |- int . """ # TODO: This is a bit of a hack. Need to re-think summary metric indication # (also more than just summary/non-summary classification?) class _SPECSummaryMetrics(object): def __contains__(self, item): return item.endswith('_real') asset_file = 'spec2000-assets.tar.gz' aliases = [ Alias('spec2k'), ] summary_metrics = _SPECSummaryMetrics() parameters = [ Parameter('benchmarks', kind=list_or_string, description='Specifies the SPEC benchmarks to run.'), Parameter('mode', kind=str, allowed_values=['speed', 'rate'], default='speed', description='SPEC benchmarks can report either speed to execute or throughput/rate. ' 'In the latter case, several "threads" will be spawned.'), Parameter('number_of_threads', kind=int, default=None, description='Specify the number of "threads" to be used in \'rate\' mode. (Note: ' 'on big.LITTLE systems this is the number of threads, for *each cluster*). '), Parameter('force_extract_assets', kind=boolean, default=False, description='if set to ``True``, will extract assets from the bundle, even if they are ' 'already extracted. Note: this option implies ``force_push_assets``.'), Parameter('force_push_assets', kind=boolean, default=False, description='If set to ``True``, assets will be pushed to device even if they\'re already ' 'present.'), Parameter('timeout', kind=int, default=20 * 60, description='Timeout, in seconds, for the execution of single spec test.'), ] speed_run_template = 'cd {datadir}; time ({launch_command})' rate_run_template = 'cd {datadir}; time ({loop}; wait)' loop_template = 'for i in $({busybox} seq 1 {threads}); do {launch_command} 1>/dev/null 2>&1 & done' launch_template = '{busybox} taskset {cpumask} {command} 1>/dev/null 2>&1' timing_regex = re.compile(r'(?P<minutes>\d+)m(?P<seconds>[\d.]+)s\s+(?P<category>\w+)')
[docs] def init_resources(self, context): self._load_spec_benchmarks(context)
[docs] def setup(self, context): cpus = self.device.core_names if not cpus: raise WorkloadError('Device has not specified CPU cores configuration.') cpumap = defaultdict(list) for i, cpu in enumerate(cpus): cpumap[cpu.lower()].append(i) for benchspec in self.benchmarks: commandspecs = self._verify_and_deploy_benchmark(benchspec, cpumap) self._build_command(benchspec, commandspecs)
[docs] def run(self, context): for name, command in self.commands: self.timings[name] = self.device.execute(command, timeout=self.timeout)
[docs] def update_result(self, context): for benchmark, output in self.timings.iteritems(): matches = self.timing_regex.finditer(output) found = False for match in matches: category = match.group('category') mins = float(match.group('minutes')) secs = float(match.group('seconds')) total = secs + 60 * mins context.result.add_metric('_'.join([benchmark, category]), total, 'seconds', lower_is_better=True) found = True if not found: self.logger.error('Could not get timings for {}'.format(benchmark))
def validate(self): if self.force_extract_assets: self.force_push_assets = True if self.benchmarks is None: # pylint: disable=access-member-before-definition self.benchmarks = ['all'] for benchname in self.benchmarks: if benchname == 'all': self.benchmarks = self.loaded_benchmarks.keys() break if benchname not in self.loaded_benchmarks: raise ConfigError('Unknown SPEC benchmark: {}'.format(benchname)) if self.mode == 'speed': if self.number_of_threads is not None: raise ConfigError('number_of_threads cannot be specified in speed mode.') else: raise ValueError('Unexpected SPEC2000 mode: {}'.format(self.mode)) # Should never get here self.commands = [] self.timings = {} def _load_spec_benchmarks(self, context): self.loaded_benchmarks = {} self.categories = set() if self.force_extract_assets or len(os.listdir(self.dependencies_directory)) < 2: bundle = context.resolver.get(ExtensionAsset(self, self.asset_file)) with tarfile.open(bundle, 'r:gz') as tf: tf.extractall(self.dependencies_directory) for entry in os.listdir(self.dependencies_directory): entrypath = os.path.join(self.dependencies_directory, entry) if os.path.isdir(entrypath): for bench in os.listdir(entrypath): self.categories.add(entry) benchpath = os.path.join(entrypath, bench) self._load_benchmark(benchpath, entry) def _load_benchmark(self, path, category): datafiles = [] cpus = [] for df in os.listdir(os.path.join(path, 'data')): datafiles.append(os.path.join(path, 'data', df)) for cpu in os.listdir(os.path.join(path, 'cpus')): cpus.append(cpu) commandsdir = os.path.join(path, 'commands') for command in os.listdir(commandsdir): bench = SpecBenchmark() bench.name = os.path.splitext(command)[0] bench.path = path bench.category = category bench.datafiles = datafiles bench.cpus = cpus with open(os.path.join(commandsdir, command)) as fh: bench.command_template = string.Template(fh.read().strip()) self.loaded_benchmarks[bench.name] = bench def _verify_and_deploy_benchmark(self, benchspec, cpumap): # pylint: disable=R0914 """Verifies that the supplied benchmark spec is valid and deploys the required assets to the device (if necessary). Returns a list of command specs (one for each CPU cluster) that can then be used to construct the final command.""" bench = self.loaded_benchmarks[benchspec] basename = benchspec.split('.')[0] datadir = self.device.path.join(self.device.working_directory, self.name, basename) if self.force_push_assets or not self.device.file_exists(datadir): self.device.execute('mkdir -p {}'.format(datadir)) for datafile in bench.datafiles: self.device.push_file(datafile, self.device.path.join(datadir, os.path.basename(datafile))) if self.mode == 'speed': cpus = [self._get_fastest_cpu().lower()] else: cpus = cpumap.keys() cmdspecs = [] for cpu in cpus: try: host_bin_file = bench.get_binary(cpu) except ValueError, e: try: msg = e.message msg += ' Attempting to use generic binary instead.' self.logger.debug(msg) host_bin_file = bench.get_binary('generic') cpu = 'generic' except ValueError, e: raise ConfigError(e.message) # re-raising as user error binname = os.path.basename(host_bin_file) binary = self.device.install(host_bin_file, with_name='.'.join([binname, cpu])) commandspec = CommandSpec() commandspec.command = bench.command_template.substitute({'binary': binary}) commandspec.datadir = datadir if cpu == 'generic': all_cpus = reduce(operator.add, cpumap.values()) commandspec.cpumask = get_cpu_mask(all_cpus) else: commandspec.cpumask = get_cpu_mask(cpumap[cpu]) cmdspecs.append(commandspec) return cmdspecs def _build_command(self, name, commandspecs): if self.mode == 'speed': if len(commandspecs) != 1: raise AssertionError('Must be exactly one command spec specified in speed mode.') spec = commandspecs[0] launch_command = self.launch_template.format(busybox=self.device.busybox, command=spec.command, cpumask=spec.cpumask) self.commands.append((name, self.speed_run_template.format(datadir=spec.datadir, launch_command=launch_command))) elif self.mode == 'rate': loops = [] for spec in commandspecs: launch_command = self.launch_template.format(busybox=self.device.busybox, command=spec.command, cpumask=spec.cpumask) loops.append(self.loop_template.format(busybox=self.device.busybox, launch_command=launch_command, threads=spec.threads)) self.commands.append((name, self.rate_run_template.format(datadir=spec.datadir, loop='; '.join(loops)))) else: raise ValueError('Unexpected SPEC2000 mode: {}'.format(self.mode)) # Should never get here def _get_fastest_cpu(self): cpu_types = set(self.device.core_names) if len(cpu_types) == 1: return cpu_types.pop() fastest_cpu = None fastest_freq = 0 for cpu_type in cpu_types: try: idx = self.device.get_core_online_cpu(cpu_type) freq = self.device.get_cpu_max_frequency(idx) if freq > fastest_freq: fastest_freq = freq fastest_cpu = cpu_type except ValueError: pass if not fastest_cpu: raise WorkloadError('No active CPUs found on device. Something is very wrong...') return fastest_cpu
[docs]class SpecBenchmark(object): def __init__(self): self.name = None self.path = None self.category = None self.command_template = None self.cpus = [] self.datafiles = []
[docs] def get_binary(self, cpu): if cpu not in self.cpus: raise ValueError('CPU {} is not supported by {}.'.format(cpu, self.name)) binpath = os.path.join(self.path, 'cpus', cpu, self.name.split('.')[0]) if not os.path.isfile(binpath): raise ValueError('CPU {} is not supported by {}.'.format(cpu, self.name)) return binpath
[docs]class CommandSpec(object): def __init__(self): self.cpumask = None self.datadir = None self.command = None self.threads = None