471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
|
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
|
-
+
-
+
-
+
|
## functions to get text outside pattern scope
# warning: check compile_rules.py to understand how it works
def nextword (s, iStart, n):
"get the nth word of the input string or empty string"
m = re.match("( +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:])
m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:])
if not m:
return None
return (iStart+m.start(2), m.group(2))
return (iStart+m.start(1), m.group(1))
def prevword (s, iEnd, n):
"get the (-)nth word of the input string or empty string"
m = re.search("([\\w%-]+) +([\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd])
m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd])
if not m:
return None
return (m.start(1), m.group(1))
def nextword1 (s, iStart):
"get next word (optimization)"
|