#!/usr/bin/env awk -f

# Parse the "title" of a Markdown document.

BEGIN {
  # We trim very long "titles".
  MAX_LENGTH = 500
}

function strip() {
  # Strip unnecessary space from a line.
  gsub(/[ ]+/, " ") # inner
  sub(/^[ ]/, "")   # leading
  sub(/[ ]$/, "")   # trailing
}

# Compute file ordinal.
FNR == 1 { N += 1 }

# Setext header marker.
# Whatever came before was the title.
/^(=+|-+)$/ {
  found[N] = 1
  nextfile
}

/^#+[ ]*[^ ]+/ {
  if (title[N] == "") {
    # This is an ATX-style title.
    strip()
    sub(/^#+[ ]*/, "")
    title[N] = $0
    found[N] = 1
    nextfile
  }

  # This line is part of the title.
  # Example:
  #
  #    A title running
  #    over a certain
  # -> # of lines
  #    ===============
}

# Stop if the title has become suspiciously long.
length(title[N]) > MAX_LENGTH {
  found[N] = 1
  nextfile
}

{
  strip()
}

# Empty line.
$0 == "" {
  if (title[N] != "") {
    # Whatever came before was the title.
    found[N] = 1
    nextfile
  }

  # We've read empty lines so far.
}

# Consume more of the title.
{
  if (title[N] == "") {
    title[N] = $0
  } else {
    title[N] = title[N] " " $0
  }
}

END {
  # Print titles in order, one per line.
  for (i = 1; i <= N; i++) {
    if (!found[i]) {
      print     # no title
      continue
    }
    if (length(title[i]) > MAX_LENGTH)
      title[i] = substr(title[i], 1, MAX_LENGTH) "..."
    print title[i]
  }
}