aboutsummaryrefslogtreecommitdiff
path: root/section-parse.py
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2023-03-02 17:09:02 -0500
committerPaul Duncan <pabs@pablotron.org>2023-03-02 17:09:02 -0500
commit14419f066973d26192bea9b97707bd872af772c7 (patch)
treee32224d068e8df8f387127a10efa3721d4a269d8 /section-parse.py
downloadsection-parse-examples-14419f066973d26192bea9b97707bd872af772c7.tar.bz2
section-parse-examples-14419f066973d26192bea9b97707bd872af772c7.zip
initial commit
Diffstat (limited to 'section-parse.py')
-rwxr-xr-xsection-parse.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/section-parse.py b/section-parse.py
new file mode 100755
index 0000000..f2633dc
--- /dev/null
+++ b/section-parse.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+#
+# section-parse.py: scan standard input and do the following:
+#
+# 1. look for a section that begins with "test-text "STUFF/foo" and ends
+# with a blank line.
+# 2. print the "bar.DIGITS" suffix for each line in the section
+#
+# Example:
+#
+# > cat input.txt
+# test-text "blah blah blah garbage/foo"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# test-text "blah blah blah garbage/apple"
+# random crap bar.1
+# random crap bar.2
+# random crap bar.3
+# random crap bar.4
+#
+# ... (more stuff omitted for brevity) ...
+# > ./section-parse.py < input.txt
+# bar.1
+# bar.2
+# bar.3
+# bar.4
+#
+
+# load libraries
+import re
+import sys
+
+# regex to match section header line
+# (e.g. a line that looks like this: test-text "GARBAGE/foo"
+HEAD = re.compile('test-text ".*/foo"')
+
+# regex to match section footer
+# (e.g. a blank line or line containing only spaces)
+FOOT = re.compile('^\s*$')
+
+# regex to match "bar.DIGITS" suffix on body lines
+# (used
+BODY = re.compile('^.*(bar\.\d+)$')
+
+# parsing state: are we in the body of the relevant section?
+in_body = False
+
+# read lines from standard input
+for line in sys.stdin.readlines():
+ # strip trailing newline
+ line = line.strip()
+
+ if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
+ # line is section header or footer, toggle state
+ in_body = not in_body
+ elif in_body:
+ # in section body, match and print "bar.DIGITS" suffix
+ print(BODY.sub('\\1', line))