From 14419f066973d26192bea9b97707bd872af772c7 Mon Sep 17 00:00:00 2001
From: Paul Duncan <pabs@pablotron.org>
Date: Thu, 2 Mar 2023 17:09:02 -0500
Subject: initial commit

---
 section-parse.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100755 section-parse.py

(limited to 'section-parse.py')

diff --git a/section-parse.py b/section-parse.py
new file mode 100755
index 0000000..f2633dc
--- /dev/null
+++ b/section-parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+#
+# section-parse.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#
+#   ... (more stuff omitted for brevity) ...
+#   > ./section-parse.py < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# regex to match section header line
+# (e.g. a line that looks like this: test-text "GARBAGE/foo"
+HEAD = re.compile('test-text ".*/foo"')
+
+# regex to match section footer 
+# (e.g. a blank line or line containing only spaces)
+FOOT = re.compile('^\s*$')
+
+# regex to match "bar.DIGITS" suffix on body lines
+# (used 
+BODY = re.compile('^.*(bar\.\d+)$')
+
+# parsing state: are we in the body of the relevant section?
+in_body = False
+
+# read lines from standard input
+for line in sys.stdin.readlines():
+  # strip trailing newline
+  line = line.strip()
+
+  if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
+    # line is section header or footer, toggle state
+    in_body = not in_body
+  elif in_body:
+    # in section body, match and print "bar.DIGITS" suffix
+    print(BODY.sub('\\1', line))
-- 
cgit v1.2.3