section-parse.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

#!/usr/bin/python3

#
# section-parse.py: scan standard input and do the following:
#
# 1. look for a section that begins with "test-text "STUFF/foo" and ends
#    with a blank line.
# 2. print the "bar.DIGITS" suffix for each line in the section
#
# Example:
#
#   > cat input.txt
#   test-text "blah blah blah garbage/foo"
#   random crap bar.1
#   random crap bar.2
#   random crap bar.3
#   random crap bar.4
#   
#   test-text "blah blah blah garbage/apple"
#   random crap bar.1
#   random crap bar.2
#   random crap bar.3
#   random crap bar.4
#
#   ... (more stuff omitted for brevity) ...
#   > ./section-parse.py < input.txt
#   bar.1
#   bar.2
#   bar.3
#   bar.4
#

# load libraries
import re, sys

# regex to match section header line
# (e.g. a line that looks like this: test-text "GARBAGE/foo"
HEAD = re.compile('test-text ".*/foo"')

# regex to match section footer 
# (e.g. a blank line or line containing only spaces)
FOOT = re.compile('^\s*$')

# regex to match "bar.DIGITS" suffix on body lines
# (used 
BODY = re.compile('^.*(bar\.\d+)$')

# parsing state: are we in the body of the relevant section?
in_body = False

# read lines from standard input
for line in sys.stdin.readlines():
  # strip trailing newline
  line = line.strip()

  if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)):
    # line is section header or footer, toggle state
    in_body = not in_body
  elif in_body:
    # in section body, match and print "bar.DIGITS" suffix
    print(BODY.sub('\\1', line))