#!/usr/bin/env awk -f # Parse the "title" of a Markdown document. BEGIN { # We trim very long "titles". MAX_LENGTH = 500 } function strip() { # Strip unnecessary space from a line. gsub(/[ ]+/, " ") # inner sub(/^[ ]/, "") # leading sub(/[ ]$/, "") # trailing } # Compute file ordinal. FNR == 1 { N += 1 } # Setext header marker. # Whatever came before was the title. /^(=+|-+)$/ { found[N] = 1 nextfile } /^#+[ ]*[^ ]+/ { if (title[N] == "") { # This is an ATX-style title. strip() sub(/^#+[ ]*/, "") title[N] = $0 found[N] = 1 nextfile } # This line is part of the title. # Example: # # A title running # over a certain # -> # of lines # =============== } # Stop if the title has become suspiciously long. length(title[N]) > MAX_LENGTH { found[N] = 1 nextfile } { strip() } # Empty line. $0 == "" { if (title[N] != "") { # Whatever came before was the title. found[N] = 1 nextfile } # We've read empty lines so far. } # Consume more of the title. { if (title[N] == "") { title[N] = $0 } else { title[N] = title[N] " " $0 } } END { # Print titles in order, one per line. for (i = 1; i <= N; i++) { if (!found[i]) { print # no title continue } if (length(title[i]) > MAX_LENGTH) title[i] = substr(title[i], 1, MAX_LENGTH) "..." print title[i] } }