aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--README.md16
-rw-r--r--input.txt17
-rw-r--r--section-parse-insane.py40
-rw-r--r--section-parse-insane.rb35
-rw-r--r--section-parse.c120
-rw-r--r--section-parse.go85
-rwxr-xr-xsection-parse.py62
-rw-r--r--section-parse.rb47
9 files changed, 423 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..09632a3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+section-parse
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1735fc9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+Section Parsing Examples
+========================
+
+Section parsing examples for Alonzo in several languages.
+
+The files in this directory are as follows:
+
+* `input.txt`: example input file.
+* `README.txt`: top-level documentation.
+* `section-parse.c`: C section parser. See header comment for
+ build command.
+* `section-parse.go`: Go section parser.
+* `section-parse.py`: Python section parser.
+* `section-parse.rb`: Ruby section parser.
+* `section-parse-insane.py`: Python section parser (insane version).
+* `section-parse-insane.rb`: Ruby section parser (insane version).
diff --git a/input.txt b/input.txt
new file mode 100644
index 0000000..5a278bc
--- /dev/null
+++ b/input.txt
@@ -0,0 +1,17 @@
+test-text "blah blah blah garbage/foo"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
+
+test-text "blah blah blah garbage/apple"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
+
+test-text "blah blah blah garbage/peach"
+random crap bar.1
+random crap bar.2
+random crap bar.3
+random crap bar.4
diff --git a/section-parse-insane.py b/section-parse-insane.py
new file mode 100644
index 0000000..e7c6d9e
--- /dev/null
+++ b/section-parse-insane.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+
+#
+# section-parse-insane.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+# with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+# > cat input.txt
+# test-text "blah blah blah garbage/foo"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# test-text "blah blah blah garbage/apple"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+# > ./section-parse-insane.py < input.txt
+# bar.1
+# bar.2
+# bar.3
+# bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# section and row matches
+S = re.compile('(?m)^test-text "[^"]+/foo"\n((?:^[^\n]*bar\.\d+\n)+)^\s*\n')
+B = re.compile('^.*(bar\.\d+)$')
+
+# read stdin, extract section body, split by line, map rows to suffix,
+# and print to stdout
+print("\n".join([B.sub('\\1', s) for s in S.search(sys.stdin.read())[1].strip().split("\n")]))
diff --git a/section-parse-insane.rb b/section-parse-insane.rb
new file mode 100644
index 0000000..47ee824
--- /dev/null
+++ b/section-parse-insane.rb
@@ -0,0 +1,35 @@
+#!/usr/bin/env ruby
+
+#
+# section-parse-insane.rb: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+# with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+# > cat input.txt
+# test-text "blah blah blah garbage/foo"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# test-text "blah blah blah garbage/apple"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+# > ./section-parse-insane.rb < input.txt
+# bar.1
+# bar.2
+# bar.3
+# bar.4
+#
+
+# section and row matches
+S = %r{^test-text "[^"]+/foo"\n((?:^[^\n]*bar\.\d+\n)+)^\s*\n}m
+B = /^.*(bar\.\d+)$/
+
+# extract section body, split by line, map to suffix, and print result
+puts(ARGF.read.scan(S)[0][0].split(/\n/).map { |s| s.gsub(B, '\1') })
diff --git a/section-parse.c b/section-parse.c
new file mode 100644
index 0000000..78fda1f
--- /dev/null
+++ b/section-parse.c
@@ -0,0 +1,120 @@
+//
+// section-parse: scan standard input and do the following:
+//
+// 1. look for a section that begins with "test-text "STUFF/foo" and ends
+// with a blank line.
+// 2. print the "bar.DIGITS" suffix for each line in the section
+//
+// Build:
+// cc -std=c17 -W -Wall -Wextra -Werror -pedantic -O2 -o section-parse{,.c}
+//
+// Example:
+// > cc -std=c17 -W -Wall -Wextra -Werror -pedantic -O2 -o section-parse{,.c}
+// > ./section-parse < input.txt
+// bar.1
+// bar.2
+// bar.3
+// bar.4
+//
+
+#include <stdlib.h> // exit()
+#include <stdio.h> // fgets(), feof()
+#include <string.h> // memcmp(), strlen()
+#include <stdbool.h> // bool
+
+// section header line prefix
+// (used by is_header())
+static const char HEAD_PREFIX[] = "test-text \"";
+static const size_t HEAD_PREFIX_LEN = sizeof(HEAD_PREFIX) - 1;
+
+// section footer line suffix
+// (used by is_header())
+static const char HEAD_SUFFIX[] = "/foo\"\n";
+static const size_t HEAD_SUFFIX_LEN = sizeof(HEAD_SUFFIX) - 1;
+
+/**
+ * Return true if the given line is a section header, and false
+ * otherwise.
+ */
+static bool is_header(const char * const s) {
+ const size_t len = strlen(s);
+
+ // check string length
+ if (len < (HEAD_PREFIX_LEN + HEAD_SUFFIX_LEN)) {
+ return false;
+ }
+
+ // compare line prefix and suffix and return true if they both match
+ return !memcmp(s, HEAD_PREFIX, HEAD_PREFIX_LEN) &&
+ !memcmp(s + len - HEAD_SUFFIX_LEN, HEAD_SUFFIX, HEAD_SUFFIX_LEN);
+}
+
+/**
+ * Return true if the given line is a section footer, and false
+ * otherwise.
+ */
+static bool is_footer(const char * const s) {
+ const size_t len = strlen(s);
+
+ for (size_t i = 0; i < len; i++) {
+ if (s[i] != ' ' && s[i] != '\n') {
+ // found non-space character, return false
+ return false;
+ }
+ }
+
+ // return true
+ return true;
+}
+
+/**
+ * Match ".DIGITS" suffix on given line and print "bar.DIGITS" to standard
+ * output.
+ *
+ * Exits with a non-zero error code if the given line is invalid.
+ */
+static void print_body_line(const char * const s) {
+ const size_t len = strlen(s);
+
+ // check line length
+ if (len < 2) {
+ fprintf(stderr, "short line\n");
+ exit(-1);
+ }
+
+ // find trailing '.'
+ // (note: technically this should probably use isdigit() too)
+ size_t pos = len - 2;
+ while (pos > 0 && s[pos] != '.') {
+ pos--;
+ }
+
+ // check to make sure we found trailing '.'
+ if (s[pos] != '.') {
+ fprintf(stderr, "invalid line: %s", s);
+ exit(-1);
+ }
+
+ // print trailing digits
+ // (note: match also includes trailing newline)
+ printf("bar.%s", s + pos + 1);
+}
+
+int main() {
+ char buf[1024]; // line buffer
+ bool in_body = false; // parse state
+
+ // read lines from standard input
+ while (fgets(buf, sizeof(buf), stdin) && !feof(stdin)) {
+ if ((!in_body && is_header(buf)) || (in_body && is_footer(buf))) {
+ // toggle state
+ in_body = !in_body;
+ } else if (in_body) {
+ // print body line
+ print_body_line(buf);
+ }
+ }
+
+ // return success
+ return 0;
+}
diff --git a/section-parse.go b/section-parse.go
new file mode 100644
index 0000000..83c2ed2
--- /dev/null
+++ b/section-parse.go
@@ -0,0 +1,85 @@
+//
+// section-parse.go: scan standard input and do the following:
+//
+// 1. look for a section that begins with "test-text "STUFF/foo" and ends
+// with a blank line.
+// 2. print the "bar.DIGITS" suffix for each line in the section
+//
+// Build:
+//
+// go build section-parse.go
+//
+// Alternatively, you can run this script directly like this:
+//
+// go run ./section-parse.go < input.txt
+//
+// Example:
+//
+// > go run ./section-parse.go < input.txt
+// bar.1
+// bar.2
+// bar.3
+// bar.4
+//
+package main
+
+import (
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "regexp"
+ "strings"
+)
+
+// section header regex
+var headerRe = regexp.MustCompile(`^test-text ".*/foo"$`)
+
+// section footer regex
+var footerRe = regexp.MustCompile(`^\s*$`)
+
+// body line regex
+var bodyRe = regexp.MustCompile(`^.*(bar\.\d+)$`)
+
+// Is the given line a section header?
+func isHeader(s string) bool {
+ return headerRe.MatchString(s)
+}
+
+// Is the given line a section footer?
+func isFooter(s string) bool {
+ return footerRe.MatchString(s)
+}
+
+// Read slice of lines from standard input.
+func getLines() []string {
+ // read from standard input
+ buf, err := io.ReadAll(os.Stdin)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // split into lines, return result
+ return strings.Split(string(buf), "\n")
+}
+
+func main() {
+ inBody := false
+
+ // walk lines
+ for _, line := range(getLines()) {
+ if (!inBody && isHeader(line)) || (inBody && isFooter(line)) {
+ // toggle state
+ inBody = !inBody
+ } else if inBody {
+ // match digits, check for error
+ md := bodyRe.FindStringSubmatch(line)
+ if len(md) < 2 {
+ log.Fatalf("invalid body line: %s", line)
+ }
+
+ // print match to standard output
+ fmt.Println(md[1])
+ }
+ }
+}
diff --git a/section-parse.py b/section-parse.py
new file mode 100755
index 0000000..f2633dc
--- /dev/null
+++ b/section-parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+#
+# section-parse.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+# with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#
+# > cat input.txt
+# test-text "blah blah blah garbage/foo"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# test-text "blah blah blah garbage/apple"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# ... (more stuff omitted for brevity) ...
+# > ./section-parse.py < input.txt
+# bar.1
+# bar.2
+# bar.3
+# bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# regex to match section header line
+# (e.g. a line that looks like this: test-text "GARBAGE/foo"
+HEAD = re.compile('test-text ".*/foo"')
+
+# regex to match section footer
+# (e.g. a blank line or line containing only spaces)
+FOOT = re.compile('^\s*$')
+
+# regex to match "bar.DIGITS" suffix on body lines
+# (used
+BODY = re.compile('^.*(bar\.\d+)$')
+
+# parsing state: are we in the body of the relevant section?
+in_body = False
+
+# read lines from standard input
+for line in sys.stdin.readlines():
+ # strip trailing newline
+ line = line.strip()
+
+ if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
+ # line is section header or footer, toggle state
+ in_body = not in_body
+ elif in_body:
+ # in section body, match and print "bar.DIGITS" suffix
+ print(BODY.sub('\\1', line))
diff --git a/section-parse.rb b/section-parse.rb
new file mode 100644
index 0000000..0d63f38
--- /dev/null
+++ b/section-parse.rb
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+
+#
+# section-parse.rb: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+# with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+# > cat input.txt
+# test-text "blah blah blah garbage/foo"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# test-text "blah blah blah garbage/apple"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+# > ./section-parse.rb < input.txt
+# bar.1
+# bar.2
+# bar.3
+# bar.4
+#
+
+# default state
+in_body = false
+
+# read lines
+ARGF.readlines.map { |s| s.strip }.each do |s|
+ # check if either of the following conditions are true:
+ #
+ # 1. we are outside the section and the current line is the matching
+ # section header
+ # 2. we are in the section body and the current line is empty
+ if (!in_body && s =~ /^test-text ".*\/foo"$/) || (in_body && s =~ /^\s*$/)
+ # toggle state
+ in_body = !in_body
+ elsif in_body
+ # print "bar.NUMBERS" line suffix
+ puts s.gsub(/.*(bar\.\d+)$/, '\1')
+ end
+end