Grammalecte  Check-in [70e6105d8a]

Overview
Comment:[core] gc engine: small code clarification
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 70e6105d8a286372a3712b10b3ad2e1c5b31e5b9fac288e5f833fc4b09961057
User & Date: olr on 2018-06-11 09:11:41
Other Links: branch diff | manifest | tags
Context
2018-06-11
09:26
[build][core] named graphs check-in: 7e92a17d42 user: olr tags: core, build, rg
09:11
[core] gc engine: small code clarification check-in: 70e6105d8a user: olr tags: core, rg
2018-06-09
09:46
[core] debug mode check-in: cb932c349b user: olr tags: core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [0afa6afeb1] to [7c7a312e6d].

136
137
138
139
140
141
142
143

144
145
146
147

148
149
150
151
152




153
154
155
156
157
158
159
136
137
138
139
140
141
142

143
144
145
146

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163







-
+



-
+





+
+
+
+







            dDA.clear()
            try:
                # regex parser
                _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                # token parser
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, True, bContext)
                bChange, errs = oSentence.parse(dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
                aErrors.update(errs)
                if bChange:
                    oSentence.rewrite()
                    if True:
                    if bDebug:
                        print("~", oSentence.sSentence)
            except:
                raise
    return aErrors.values() # this is a view (iterable)


_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()

374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
378
379
380
381
382
383
384








385
386
387
388
389
390
391







-
-
-
-
-
-
-
-








def _getPath ():
    return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")



#### common functions

# common regexes
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
_zNextWord = re.compile(r" +(\w[\w-]*)")
_zPrevWord = re.compile(r"(\w[\w-]*) +$")


def option (sOpt):
    "return True if option sOpt is active"
    return _dOptions.get(sOpt, False)


def displayInfo (dDA, tWord):
462
463
464
465
466
467
468



469
470
471
472
473
474
475
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474







+
+
+










## functions to get text outside pattern scope

# warning: check compile_rules.py to understand how it works

_zNextWord = re.compile(r" +(\w[\w-]*)")
_zPrevWord = re.compile(r"(\w[\w-]*) +$")

def nextword (s, iStart, n):
    "get the nth word of the input string or empty string"
    m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:])
    if not m:
        return None
    return (iStart+m.start(1), m.group(1))

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
562
563
564
565
566
567
568





569
570
571
572
573
574
575







-
-
-
-
-







    return True


#### GRAMMAR CHECKER PLUGINS

${plugins}


#### CALLABLES (generated code)

${callables}



#### TOKEN SENTENCE CHECKER

class TokenSentence:

    def __init__ (self, sSentence, sSentence0, nOffset):
925
926
927
928
929
930
931
932

933





934
919
920
921
922
923
924
925

926
927
928
929
930
931
932
933







-
+

+
+
+
+
+

def g_define (dToken, lMorph):
    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    #print("DA:", dToken["sValue"], lMorph)
    return True


#### CALLABLES (generated code)
#### CALLABLES FOR REGEX RULES (generated code)

${callables}


#### CALLABLES FOR GRAPH RULES (generated code)

${graph_callables}