Overview
Comment: | [build][core] named graphs |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | build | rg |
Files: | files | file ages | folders |
SHA3-256: |
7e92a17d420719345291388e13ab89f1 |
User & Date: | olr on 2018-06-11 09:26:03 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-11
| ||
13:21 | [build][core] graph calls within regex rules check-in: eeef098bd9 user: olr tags: core, build, rg | |
09:26 | [build][core] named graphs check-in: 7e92a17d42 user: olr tags: core, build, rg | |
09:11 | [core] gc engine: small code clarification check-in: 70e6105d8a user: olr tags: core, rg | |
Changes
Modified compile_rules_graph.py from [37d848c323] to [f86ee887b1].
︙ | ︙ | |||
10 11 12 13 14 15 16 | dACTIONS = {} lFUNCTIONS = [] def prepareFunction (s): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") | < < < < < < < < < < < | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | dACTIONS = {} lFUNCTIONS = [] def prepareFunction (s): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(morph|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s) # token(n) s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s) # token(-n) s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s) s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s) s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s) s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s) s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s) s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s) s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s) s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s) return s def genTokenLines (sTokenLine): "tokenize a string and return a list of lines of tokens" |
︙ | ︙ | |||
234 235 236 237 238 239 240 | except: print("Error. Rules file in project [" + sLang + "] not found.") exit() # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") global dDEF | < < < > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > | | | | | < | | | | | | 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | except: print("Error. Rules file in project [" + sLang + "] not found.") exit() # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") global dDEF lTest = [] lTokenLine = [] sActions = "" nPriority = 4 dAllGraph = {} sGraphName = "" for i, sLine in enumerate(lRules, 1): sLine = sLine.rstrip() if "\t" in sLine: # tabulation not allowed print("Error. Tabulation at line: ", i) exit() if sLine.startswith('#END'): # arbitrary end printBookmark(0, "BREAK BY #END", i) break elif sLine.startswith("#"): # comments pass elif sLine.startswith("GRAPH_NAME: "): # Graph name m = re.match("GRAPH_NAME: +([a-zA-Z_][a-zA-Z_0-9]*)+", sLine.strip()) if m: sGraphName = m.group(1) if sGraphName in dAllGraph: print("Error. Group name " + sGraphName + " already exists.") exit() dAllGraph[sGraphName] = [] else: print("Error. Graph name not found in", sLine.strip()) exit() elif sLine.startswith("DEF:"): # definition m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip()) if m: dDEF["{"+m.group(1)+"}"] = m.group(2) else: print("Error in definition: ", end="") print(sLine.strip()) elif sLine.startswith("TEST:"): # test lTest.append("g{:<7}".format(i) + " " + sLine[5:].strip()) elif sLine.startswith("TODO:"): # todo pass elif sLine.startswith("!!"): # bookmarks m = re.search("^!!+", sLine) nExMk = len(m.group(0)) if sLine[nExMk:].strip(): printBookmark(nExMk-2, sLine[nExMk:].strip(), i) elif sLine.startswith("__") and sLine.endswith("__"): # new rule group m = re.match("__(\\w+)(!\\d|)__", sLine) if m: sRuleName = m.group(1) nPriority = int(m.group(2)[1:]) if m.group(2) else 4 else: print("Error at rule group: ", sLine, " -- line:", i) break elif re.match("[ ]*$", sLine): # empty line to end merging if not lTokenLine: continue if not sActions: print("Error. No action found at line:", i) exit() if not sGraphName: print("Error. All rules must belong to a named graph. Line: ", i) exit() for j, sTokenLine in lTokenLine: dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, sActions, nPriority)) lTokenLine.clear() sActions = "" sRuleName = "" nPriority = 4 elif sLine.startswith((" ")): # actions sActions += " " + sLine.strip() else: lTokenLine.append([i, sLine.strip()]) # tests print(" list tests...") sGCTests = "\n".join(lTest) sGCTestsJS = '{ "aData2": ' + json.dumps(lTest, ensure_ascii=False) + " }\n" # processing rules print(" preparing rules...") for sGraphName, lRuleLine in dAllGraph.items(): lPreparedRule = [] for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine: for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority): lPreparedRule.append(lRule) # Show rules for e in lPreparedRule: print(e) # Graph creation oDARG = darg.DARG(lPreparedRule, sLang) dAllGraph[sGraphName] = oDARG.createGraph() # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" for sFuncName, sReturn in lFUNCTIONS: if sFuncName.startswith("g_c_"): # condition |
︙ | ︙ | |||
342 343 344 345 346 347 348 | for sActionName, aAction in dACTIONS.items(): print(sActionName, aAction) # Result d = { "graph_callables": sPyCallables, "graph_gctests": sGCTests, | | | 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | for sActionName, aAction in dACTIONS.items(): print(sActionName, aAction) # Result d = { "graph_callables": sPyCallables, "graph_gctests": sGCTests, "rules_graphs": dAllGraph, "rules_actions": dACTIONS } return d |
Modified gc_core/py/lang_core/gc_engine.py from [7c7a312e6d] to [12b89317cd].
︙ | ︙ | |||
9 10 11 12 13 14 15 | from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer from .gc_rules_graph import dAllGraph, dRule try: # LibreOffice / OpenOffice from com.sun.star.linguistic2 import SingleProofreadingError from com.sun.star.text.TextMarkupType import PROOFREADING from com.sun.star.beans import PropertyValue #import lightproof_handler_${implname} as opt |
︙ | ︙ | |||
136 137 138 139 140 141 142 | dDA.clear() try: # regex parser _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) # token parser oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) | | | 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | dDA.clear() try: # regex parser _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) # token parser oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) bChange, errs = oSentence.parse(dAllGraph["test_graph"], dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) if bChange: oSentence.rewrite() if bDebug: print("~", oSentence.sSentence) except: raise |
︙ | ︙ | |||
575 576 577 578 579 580 581 | def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffset = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.createError = self._createWriterError if _bWriterError else self._createDictError | | | 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 | def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffset = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.createError = self._createWriterError if _bWriterError else self._createDictError def _getNextMatchingNodes (self, dToken, dGraph, dNode): "generator: return nodes where <dToken> “values” match <dNode> arcs" # token value if dToken["sValue"] in dNode: #print("value found: ", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] # token lemmas if "<lemmas>" in dNode: |
︙ | ︙ | |||
617 618 619 620 621 622 623 | yield dGraph[dNode["<re_morph>"][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): continue if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): yield dGraph[dNode["<re_morph>"][sRegex]] | | | | | | | 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 | yield dGraph[dNode["<re_morph>"][sRegex]] else: if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): continue if any(re.search(sPattern, sMorph) for sMorph in _oSpellChecker.getMorph(dToken["sValue"])): yield dGraph[dNode["<re_morph>"][sRegex]] def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False): dErr = {} dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions lPointer = [] bChange = False for dToken in self.lToken: # check arcs for each existing pointer lNextPointer = [] for dPointer in lPointer: for dNode in self._getNextMatchingNodes(dToken, dGraph, dPointer["dNode"]): lNextPointer.append({"iToken": dPointer["iToken"], "dNode": dNode}) lPointer = lNextPointer # check arcs of first nodes for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0]): lPointer.append({"iToken": dToken["i"], "dNode": dNode}) # check if there is rules to check for each pointer for dPointer in lPointer: if "<rules>" in dPointer["dNode"]: bHasChanged, errs = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext) dErr.update(errs) if bHasChanged: bChange = True return (bChange, dErr) def _executeActions (self, dGraph, dNode, nTokenOffset, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext): "execute actions found in the DARG" dErrs = {} bChange = False for sLineId, nextNodeKey in dNode.items(): for sRuleId in dGraph[nextNodeKey]: bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_rules_graph.py from [e9a58f5498] to [b99ba93b1b].
1 2 | # generated code, do not edit | | | 1 2 3 4 5 | # generated code, do not edit dAllGraph = ${rules_graphs} dRule = ${rules_actions} |
Modified gc_lang/fr/rules_graph.grx from [7f8d7a1159] to [7ae68d4f85].
︙ | ︙ | |||
34 35 36 37 38 39 40 41 42 43 44 45 46 47 | # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes __da1__ ne >donner <<- =>> select(\2, ":V") TEST: je ne donne rien. | > > | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes GRAPH_NAME: test_graph __da1__ ne >donner <<- =>> select(\2, ":V") TEST: je ne donne rien. |
︙ | ︙ |