9 files changed, 423 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..09632a3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+section-parse
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1735fc9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+Section Parsing Examples
+========================
+
+Section parsing examples for Alonzo in several languages.
+
+The files in this directory are as follows:
+
+* `input.txt`: example input file.
+* `README.txt`: top-level documentation.
+* `section-parse.c`: C section parser.  See header comment for
+  build command.
+* `section-parse.go`: Go section parser.
+* `section-parse.py`: Python section parser.
+* `section-parse.rb`: Ruby section parser.
+* `section-parse-insane.py`: Python section parser (insane version).
+* `section-parse-insane.rb`: Ruby section parser (insane version).
diff --git a/input.txt b/input.txt
new file mode 100644
index 0000000..5a278bc
--- /dev/null
+++ b/input.txt
@@ -0,0 +1,17 @@
+test-text "blah blah blah garbage/foo"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
+
+test-text "blah blah blah garbage/apple"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
+
+test-text "blah blah blah garbage/peach"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
diff --git a/section-parse-insane.py b/section-parse-insane.py
new file mode 100644
index 0000000..e7c6d9e
--- /dev/null
+++ b/section-parse-insane.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+
+#
+# section-parse-insane.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   > ./section-parse-insane.py < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# section and row matches
+S = re.compile('(?m)^test-text "[^"]+/foo"\n((?:^[^\n]*bar\.\d+\n)+)^\s*\n')
+B = re.compile('^.*(bar\.\d+)$')
+
+# read stdin, extract section body, split by line, map rows to suffix,
+# and print to stdout
+print("\n".join([B.sub('\\1', s) for s in S.search(sys.stdin.read())[1].strip().split("\n")]))
diff --git a/section-parse-insane.rb b/section-parse-insane.rb
new file mode 100644
index 0000000..47ee824
--- /dev/null
+++ b/section-parse-insane.rb
@@ -0,0 +1,35 @@
+#!/usr/bin/env ruby
+
+#
+# section-parse-insane.rb: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   > ./section-parse-insane.rb < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# section and row matches
+S = %r{^test-text "[^"]+/foo"\n((?:^[^\n]*bar\.\d+\n)+)^\s*\n}m
+B = /^.*(bar\.\d+)$/
+
+# extract section body, split by line, map to suffix, and print result
+puts(ARGF.read.scan(S)[0][0].split(/\n/).map { |s| s.gsub(B, '\1') })
diff --git a/section-parse.c b/section-parse.c
new file mode 100644
index 0000000..78fda1f
--- /dev/null
+++ b/section-parse.c
@@ -0,0 +1,120 @@
+//
+// section-parse: scan standard input and do the following:
+//
+// 1. look for a section that begins with "test-text "STUFF/foo" and ends
+//    with a blank line.
+// 2. print the "bar.DIGITS" suffix for each line in the section
+//
+// Build:
+//   cc -std=c17 -W -Wall -Wextra -Werror -pedantic -O2 -o section-parse{,.c}
+//
+// Example:
+//   > cc -std=c17 -W -Wall -Wextra -Werror -pedantic -O2 -o section-parse{,.c}
+//   > ./section-parse < input.txt
+//   bar.1
+//   bar.2
+//   bar.3
+//   bar.4
+//
+
+#include <stdlib.h> // exit()
+#include <stdio.h> // fgets(), feof()
+#include <string.h> // memcmp(), strlen()
+#include <stdbool.h> // bool
+
+// section header line prefix
+// (used by is_header())
+static const char HEAD_PREFIX[] = "test-text \"";
+static const size_t HEAD_PREFIX_LEN = sizeof(HEAD_PREFIX) - 1;
+
+// section footer line suffix
+// (used by is_header())
+static const char HEAD_SUFFIX[] = "/foo\"\n";
+static const size_t HEAD_SUFFIX_LEN = sizeof(HEAD_SUFFIX) - 1;
+
+/**
+ * Return true if the given line is a section header, and false
+ * otherwise.
+ */
+static bool is_header(const char * const s) {
+  const size_t len = strlen(s);
+
+  // check string length
+  if (len < (HEAD_PREFIX_LEN + HEAD_SUFFIX_LEN)) {
+    return false;
+  }
+
+  // compare line prefix and suffix and return true if they both match
+  return !memcmp(s, HEAD_PREFIX, HEAD_PREFIX_LEN) &&
+         !memcmp(s + len - HEAD_SUFFIX_LEN, HEAD_SUFFIX, HEAD_SUFFIX_LEN);
+}
+
+/**
+ * Return true if the given line is a section footer, and false
+ * otherwise.
+ */
+static bool is_footer(const char * const s) {
+  const size_t len = strlen(s);
+
+  for (size_t i = 0; i < len; i++) {
+    if (s[i] != ' ' && s[i] != '\n') {
+      // found non-space character, return false
+      return false;
+    }
+  }
+
+  // return true
+  return true;
+}
+
+/**
+ * Match ".DIGITS" suffix on given line and print "bar.DIGITS" to standard
+ * output.
+ *
+ * Exits with a non-zero error code if the given line is invalid.
+ */
+static void print_body_line(const char * const s) {
+  const size_t len = strlen(s);
+
+  // check line length
+  if (len < 2) {
+    fprintf(stderr, "short line\n");
+    exit(-1);
+  }
+
+  // find trailing '.'
+  // (note: technically this should probably use isdigit() too)
+  size_t pos = len - 2;
+  while (pos > 0 && s[pos] != '.') {
+    pos--;
+  }
+
+  // check to make sure we found trailing '.'
+  if (s[pos] != '.') {
+    fprintf(stderr, "invalid line: %s", s);
+    exit(-1);
+  }
+
+  // print trailing digits
+  // (note: match also includes trailing newline)
+  printf("bar.%s", s + pos + 1);
+}
+
+int main() {
+  char buf[1024]; // line buffer
+  bool in_body = false; // parse state
+
+  // read lines from standard input
+  while (fgets(buf, sizeof(buf), stdin) && !feof(stdin)) {
+    if ((!in_body && is_header(buf)) || (in_body && is_footer(buf))) {
+      // toggle state
+      in_body = !in_body;
+    } else if (in_body) {
+      // print body line
+      print_body_line(buf);
+    }
+  }
+
+  // return success
+  return 0;
+}
diff --git a/section-parse.go b/section-parse.go
new file mode 100644
index 0000000..83c2ed2
--- /dev/null
+++ b/section-parse.go
@@ -0,0 +1,85 @@
+//
+// section-parse.go: scan standard input and do the following:
+//
+// 1. look for a section that begins with "test-text "STUFF/foo" and ends
+//    with a blank line.
+// 2. print the "bar.DIGITS" suffix for each line in the section
+//
+// Build:
+//
+//   go build section-parse.go
+//
+// Alternatively, you can run this script directly like this:
+//
+//   go run ./section-parse.go < input.txt
+//
+// Example:
+//
+//   > go run ./section-parse.go < input.txt
+//   bar.1
+//   bar.2
+//   bar.3
+//   bar.4
+//
+package main
+
+import (
+  "fmt"
+  "io"
+  "log"
+  "os"
+  "regexp"
+  "strings"
+)
+
+// section header regex
+var headerRe = regexp.MustCompile(`^test-text ".*/foo"$`)
+
+// section footer regex
+var footerRe = regexp.MustCompile(`^\s*$`)
+
+// body line regex
+var bodyRe = regexp.MustCompile(`^.*(bar\.\d+)$`)
+
+// Is the given line a section header?
+func isHeader(s string) bool {
+  return headerRe.MatchString(s)
+}
+
+// Is the given line a section footer?
+func isFooter(s string) bool {
+  return footerRe.MatchString(s)
+}
+
+// Read slice of lines from standard input.
+func getLines() []string {
+  // read from standard input
+  buf, err := io.ReadAll(os.Stdin)
+  if err != nil {
+    log.Fatal(err)
+  }
+
+  // split into lines, return result
+  return strings.Split(string(buf), "\n")
+}
+
+func main() {
+  inBody := false
+
+  // walk lines
+  for _, line := range(getLines()) {
+    if (!inBody && isHeader(line)) || (inBody && isFooter(line)) {
+      // toggle state
+      inBody = !inBody
+    } else if inBody {
+      // match digits, check for error
+      md := bodyRe.FindStringSubmatch(line)
+      if len(md) < 2 {
+        log.Fatalf("invalid body line: %s", line)
+      }
+
+      // print match to standard output
+      fmt.Println(md[1])
+    }
+  }
+}
diff --git a/section-parse.py b/section-parse.py
new file mode 100755
index 0000000..f2633dc
--- /dev/null
+++ b/section-parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+#
+# section-parse.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#
+#   ... (more stuff omitted for brevity) ...
+#   > ./section-parse.py < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# regex to match section header line
+# (e.g. a line that looks like this: test-text "GARBAGE/foo"
+HEAD = re.compile('test-text ".*/foo"')
+
+# regex to match section footer 
+# (e.g. a blank line or line containing only spaces)
+FOOT = re.compile('^\s*$')
+
+# regex to match "bar.DIGITS" suffix on body lines
+# (used 
+BODY = re.compile('^.*(bar\.\d+)$')
+
+# parsing state: are we in the body of the relevant section?
+in_body = False
+
+# read lines from standard input
+for line in sys.stdin.readlines():
+  # strip trailing newline
+  line = line.strip()
+
+  if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
+    # line is section header or footer, toggle state
+    in_body = not in_body
+  elif in_body:
+    # in section body, match and print "bar.DIGITS" suffix
+    print(BODY.sub('\\1', line))
diff --git a/section-parse.rb b/section-parse.rb
new file mode 100644
index 0000000..0d63f38
--- /dev/null
+++ b/section-parse.rb
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+
+#
+# section-parse.rb: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   > ./section-parse.rb < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# default state
+in_body = false
+
+# read lines
+ARGF.readlines.map { |s| s.strip }.each do |s|
+  # check if either of the following conditions are true:
+  #
+  # 1. we are outside the section and the current line is the matching
+  #    section header
+  # 2. we are in the section body and the current line is empty
+  if (!in_body && s =~ /^test-text ".*\/foo"$/) || (in_body && s =~ /^\s*$/)
+    # toggle state
+    in_body = !in_body
+  elsif in_body
+    # print "bar.NUMBERS" line suffix
+    puts s.gsub(/.*(bar\.\d+)$/, '\1')
+  end
+end