sar -> sadf の出力を Excel 等で扱える TSV に変換するシェルスクリプトを書いた

sar の出力は sadf コマンドで TSV にできるのですが、これは時間×メトリクスごとに行が出力されるので、そのまま Excel に取り込んでグラフ描いたりできません。

ということで、 sadf の出力を Excel に取り込みやすい行列型 TSV に変換するシェルスクリプトを書きました。 bashawkGNU date があれば動くはずです。 *1

こんな風に「-p <正規表現>」で出力するラベル(<デバイス>:<メトリクス>)が絞り込めます。指定しなければ全部出します。

$ sadf -- -A sar.out | sadf2matrix -p '(all|cpu.*):%.*'
time                  all:%%usr   all:%%nice  all:%%sys  ...
2014-06-22 17:32:11   10.13       0.2         2.3        ...
2014-06-22 17:32:16   9.45        0.6         3.4        ...
2014-06-22 17:32:21   10.30       0.5         2.6        ...
...

出力の改行コードはデフォルトで CR+LF です。それもこれも Excel のためです。

ラベルの一覧は「-l」で出せます。

$ sadf -- -A sar.out | sadf2matrix -l -p '(all|cpu.*):%.*'
all:%%usr
all:%%nice
all:%%sys
...

Bitbucket の miyakawataku/sadf2matrix に置きました。現時点のバージョンを以下に貼ります。

#!/usr/bin/env bash
# vim: et sw=2 sts=2

# Copyright (c) 2014 Miyakawa Taku
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# Resets IFS
IFS=$' \t\n'

# Resets aliases and functions
unset -f unalias
\unalias -a
unset -f command

# Usage of the command
usage() {
  cat <<EOS
USAGE: $0 [Options]... [Sadf-output-files]...

Converts output of sadf command to matrix-style TSV,
which can easily be treated by tools such as Microsoft Excel.

Options:

  -p <column-label-pattern>
    Regex pattern of labels of columns to be output (ex. -p 'all:%.*').
    The regex syntax is same as Awk's.
    The default is '.*' (outputs all the metrics).

  -e
    Formats column labels in the extended format <host>:<device>:<metric>,
    instead of the default format <device>:<metric>.

  -t <time-format>
    Format of times in strftime(3) syntax (ex. -t %s).
    The default is '%Y-%m-%dT%H:%M:%S'.

  -r <row-separator>
    Separator of rows (ex. -r $'\\n').
    The default is CR+LF ($'\\r\\n').

  -l
    Lists column labels.

  -A <awk-command-path>
    Path to the awk command.

  -D <date-command-path>
    Path to the GNU date command.

  -h
    Prints out this usage text.

Sadf output file:

  This command takes zero or more sadf output files after options.
  When no file is specified, this command reads the file
  from the standard input.

Example usage:

  1. Outputs CPU metrics.

    $ sadf -- -A sar.out | $0 -p '(all|cpu.*):%.*'
    time                  all:%%usr   all:%%nice  all:%%sys  ...
    2014-06-22 17:32:11   10.13       0.2       2.3       ...
    2014-06-22 17:32:16   9.45        0.6       3.4       ...
    ...

  2. Lists column labels of CPU metrics.

    $ sadf -- -A sar.out | $0 -l -p '(all|cpu.*):%.*'
    all:%%usr
    all:%%nice
    ...

This command requires awk and GNU date.
EOS
}

# AWK script
awk_script='
BEGIN {
    FS = "\t"
}

# UTC time or local time -> unix time
$3 ~ /[0-9]+-[0-9]+-[0-9]+ [0-9]+:[0-9]+:[0-9]+/ {
    $3 = datetime_to_unixtime($3)
}

{
    time = $3
    label = (host_in_label ? ($1 ":" $4 ":" $5) : ($4 ":" $5))
    metric = $6
}

label ~ label_pattern {
    # Registers the label
    if (! (label in label_set)) {
        label_set[label] = 1
        labels[++ label_count] = label
    }

    if (! lists_labels) {
        # Registers the time
        if (! (time in time_set)) {
            time_set[time] = 1
            times[++ time_count] = time
        }

        # Adds the metric
        metrics[time, label] = metric
    }
}

END {
    if (lists_labels) {
        for (label_index = 1; label_index <= label_count; ++ label_index) {
            print labels[label_index]
        }
    } else {
        # Header row: ex. time %%usr %%nice %%sys...
        str = "time"
        for (label_index = 1; label_index <= label_count; ++ label_index) {
            str = str "\t" labels[label_index]
        }
        print str

        # Following rows: ex. 2014-06-22T21:45:13 10.32 0.8 2.4 ...
        for (time_index = 1; time_index <= time_count; ++ time_index) {
            time = times[time_index]
            str = format_time(time)
            for (label_index = 1; label_index <= label_count; ++ label_index) {
                label = labels[label_index]
                metric = metrics[time, label]
                str = str "\t" metric
            }
            print str
        }
    }
}

# Unix time -> formatted date-time in time_format
function format_time(unixtime,                      date_command, formatted_time) {
    date_command = date_program " -d @" unixtime " +\"" time_format "\""
    if ((date_command | getline formatted_time) <= 0) {
        print ERRNO > "/dev/stderr"
        exit 1
    }
    close(date_command)
    return formatted_time
}

# Normalize date-time to unix time
# This process is needed because sadf may output the time field in various formats.
function datetime_to_unixtime(datetime,         date_command, unixtime) {
    if (datetime in unixtime_cache) {
        return unixtime_cache[datetime]
    } else {
        date_command = date_program " -d \"" datetime "\" +%s"
        if ((date_command | getline unixtime) <= 0) {
            print ERRNO > "/dev/stderr"
            exit 1
        }
        close(date_command)
        unixtime_cache[datetime] = unixtime
        return unixtime
    }
}
'

# Parses options
label_pattern='.*'
time_format='%Y-%m-%dT%H:%M:%S'
row_separator=$'\r\n'
awk_command=awk
date_program=date
host_in_label=0
lists_labels=0

while getopts 'p:t:r:A:D:elh' opt; do
  case $opt in
    p) label_pattern=$OPTARG ;;
    t) time_format=$OPTARG ;;
    r) row_separator=$OPTARG ;;
    A) awk_command=$OPTARG ;;
    D) date_program=$OPTARG ;;
    e) host_in_label=1 ;;
    l) lists_labels=1 ;;
    h) usage; exit 0 ;;
    :|\?) usage >&2; exit 1 ;;
  esac
done

shift $((OPTIND - 1))

# Launches AWK
exec "$awk_command" \
  -v label_pattern="$label_pattern" \
  -v time_format="$time_format" \
  -v ORS="$row_separator" \
  -v date_program="$date_program" \
  -v host_in_label="$host_in_label" \
  -v lists_labels="$lists_labels" \
  "$awk_script" \
  "$@"