#!/usr/bin/python3 # # section-parse.py: scan standard input and do the following: # # 1. look for a section that begins with "test-text "STUFF/foo" and ends # with a blank line. # 2. print the "bar.DIGITS" suffix for each line in the section # # Example: # # > cat input.txt # test-text "blah blah blah garbage/foo" # random crap bar.1 # random crap bar.2 # random crap bar.3 # random crap bar.4 # # test-text "blah blah blah garbage/apple" # random crap bar.1 # random crap bar.2 # random crap bar.3 # random crap bar.4 # # ... (more stuff omitted for brevity) ... # > ./section-parse.py < input.txt # bar.1 # bar.2 # bar.3 # bar.4 # # load libraries import re, sys # regex to match section header line # (e.g. a line that looks like this: test-text "GARBAGE/foo" HEAD = re.compile('test-text ".*/foo"') # regex to match section footer # (e.g. a blank line or line containing only spaces) FOOT = re.compile('^\s*$') # regex to match "bar.DIGITS" suffix on body lines # (used BODY = re.compile('^.*(bar\.\d+)$') # parsing state: are we in the body of the relevant section? in_body = False # read lines from standard input for line in sys.stdin.readlines(): # strip trailing newline line = line.strip() if (not in_body and HEAD.match(line)) or (in_body and FOOT.match(line)): # line is section header or footer, toggle state in_body = not in_body elif in_body: # in section body, match and print "bar.DIGITS" suffix print(BODY.sub('\\1', line))