initial commit

author: Paul Duncan <pabs@pablotron.org> 2023-03-02 17:09:02 -0500
committer: Paul Duncan <pabs@pablotron.org> 2023-03-02 17:09:02 -0500
commit: 14419f066973d26192bea9b97707bd872af772c7 (patch)
tree: e32224d068e8df8f387127a10efa3721d4a269d8 /section-parse.py
download: section-parse-examples-14419f066973d26192bea9b97707bd872af772c7.tar.bz2
section-parse-examples-14419f066973d26192bea9b97707bd872af772c7.zip
1 files changed, 62 insertions, 0 deletions
diff --git a/section-parse.py b/section-parse.py
new file mode 100755
index 0000000..f2633dc
--- /dev/null
+++ b/section-parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+#
+# section-parse.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+#    with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#
+#   > cat input.txt
+#   test-text "blah blah blah garbage/foo"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#   
+#   test-text "blah blah blah garbage/apple"
+#   random crap bar.1
+#   random crap bar.2
+#   random crap bar.3
+#   random crap bar.4
+#
+#   ... (more stuff omitted for brevity) ...
+#   > ./section-parse.py < input.txt
+#   bar.1
+#   bar.2
+#   bar.3
+#   bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# regex to match section header line
+# (e.g. a line that looks like this: test-text "GARBAGE/foo"
+HEAD = re.compile('test-text ".*/foo"')
+
+# regex to match section footer 
+# (e.g. a blank line or line containing only spaces)
+FOOT = re.compile('^\s*$')
+
+# regex to match "bar.DIGITS" suffix on body lines
+# (used 
+BODY = re.compile('^.*(bar\.\d+)$')
+
+# parsing state: are we in the body of the relevant section?
+in_body = False
+
+# read lines from standard input
+for line in sys.stdin.readlines():
+  # strip trailing newline
+  line = line.strip()
+
+  if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
+    # line is section header or footer, toggle state
+    in_body = not in_body
+  elif in_body:
+    # in section body, match and print "bar.DIGITS" suffix
+    print(BODY.sub('\\1', line))
author	Paul Duncan <pabs@pablotron.org>	2023-03-02 17:09:02 -0500
committer	Paul Duncan <pabs@pablotron.org>	2023-03-02 17:09:02 -0500
commit	14419f066973d26192bea9b97707bd872af772c7 (patch)
tree	e32224d068e8df8f387127a10efa3721d4a269d8 /section-parse.py
download	section-parse-examples-14419f066973d26192bea9b97707bd872af772c7.tar.bz2 section-parse-examples-14419f066973d26192bea9b97707bd872af772c7.zip