Last active
November 24, 2015 17:18
-
-
Save gpfreitas/eac1a25c983e832b1f03 to your computer and use it in GitHub Desktop.
Histogram for integer x and y values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hist.awk - Histogram for integer x and y values | |
# | |
# This AWK program takes as input a sequence of x, y integer values, one per | |
# row, where x is supposed to be the bin, and y is the count of values in that | |
# bin. In other words, this sequence already encodes the histogram (think of | |
# the output of uniq -c), so this script only pretty prints that histogram to | |
# the screen. Furthermore, we assume that the input rows are sorted by the bin | |
# values (the first column) and that the counts in the second column are always | |
# nonnegative. | |
# | |
# For each row of input this program prints a horizontal bar of height (y - | |
# minVal), labeled by x, where minVal is the minimum y in the data. | |
# Periodically, we print the y-value at the top of the bar. Set the paramters | |
# "label_period" and "label_offset" in the BEGIN section to change where and | |
# how often the y-value gets printed. | |
# | |
# The min/max values and total in the end are printed to STDERR. Because of | |
# that, you can do things like suppress its output by redirecting STDERR to | |
# /dev/null, or you can obtain a numbering of the histogram bins by piping | |
# through "cat -n". | |
BEGIN { | |
# User-configurable parameters | |
bin = 1 | |
val = 2 | |
maxHeight = 120 | |
block = "=" | |
label_period = 7 | |
label_offset = 0 | |
# Do not change these unless you know what you are doing | |
minVal = 2^31 | |
maxVal = 0 | |
} | |
{ | |
# collect the data in arrays | |
x[NR] = $bin | |
y[NR] = $val | |
# update stats | |
minVal = $val < minVal ? $val : minVal | |
maxVal = $val > maxVal ? $val : maxVal | |
total += $val | |
} | |
END { | |
# this is the size of a block/unit in the bar | |
scale = int((maxVal - minVal) / maxHeight) | |
# print the histogram bars | |
for (i=1;i<=NR;i++) { | |
printf "%10d ", x[i] | |
while (y[i] > minVal) { | |
printf "%s", block | |
y[i] -= scale | |
} | |
if ((i - 1) % label_period == label_offset) {printf " %'\''d", y[i]} | |
printf "\n" | |
} | |
# print stats | |
printf "Minimum value: %'\''d\n", minVal | "cat 1>&2" | |
printf "Maximum value: %'\''d\n", maxVal | "cat 1>&2" | |
printf "Total: %'\''d\n", total | "cat 1>&2" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment