Overview
Comment: | [build][core] graph parser update |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | core | build | rg |
Files: | files | file ages | folders |
SHA3-256: |
7c742b5359f19425564d3f6ed2713e6b |
User & Date: | olr on 2018-05-25 20:07:58 |
Other Links: | branch diff | manifest | tags |
Context
2018-05-29
| ||
16:17 | [core] gc engine update check-in: c06b45b671 user: olr tags: core, rg | |
2018-05-25
| ||
20:07 | [build][core] graph parser update check-in: 7c742b5359 user: olr tags: core, build, rg | |
12:14 | [build][core] tests check-in: ac09d7cc19 user: olr tags: core, build, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [db19c73d82] to [b7c579e4d3].
︙ | ︙ | |||
9 10 11 12 13 14 15 | from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer from .gc_rules_graph import dGraph, dRule __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ "load", "parse", "getSpellChecker", \ "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ] |
︙ | ︙ | |||
584 585 586 587 588 589 590 591 592 593 | self.iStart = iStart self.lToken = list(_oTokenizer.genTokens(sSentence)) def parse (self): dErr = {} lPointer = [] for dToken in self.lToken: for i, dPointer in enumerate(lPointer): bValid = False for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): | > > > > | | > > > > > | > < | > > > > > > > > | | | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 | self.iStart = iStart self.lToken = list(_oTokenizer.genTokens(sSentence)) def parse (self): dErr = {} lPointer = [] for dToken in self.lToken: # check arcs for each existing pointer lNewPointer = [] for i, dPointer in enumerate(lPointer): bValid = False bFirst = True for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): if bFirst: dPointer["nOffset"] = dToken["i"] dPointer["dNode"] = dNode else: lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode}) bFirst = False bValid = True if not bValid: del lPointer[i] lPointer.extend(lNewPointer) # check arcs of first nodes for dNode in self._getNextMatchingNodes(dToken, dGraph[0]): lPointer.append({"nOffset": 0, "dNode": dNode}) # check if there is rules to check for each pointer for dPointer in lPointer: if "<rules>" in dPointer["dNode"]: dErr = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"]) if dErr: print(dErr) return dErr def _getNextMatchingNodes (self, dToken, dNode): # token value if dToken["sValue"] in dNode: print("value found: ", dToken["sValue"]) yield dGraph[dNode[dToken["sValue"]]] # token lemmas if "<lemmas>" in dNode: for sLemma in _oSpellChecker.getLemma(dToken["sValue"]): if sLemma in dNode["<lemmas>"]: print("lemma found: ", sLemma) yield dGraph[dNode["<lemmas>"][sLemma]] # universal arc if "*" in dNode: print("generic arc") yield dGraph[dNode["*"]] # regex value arcs if "<re_value>" in dNode: for sRegex in dNode["<re_value>"]: if re.search(sRegex, dToken["sValue"]): print("value regex matching: ", sRegex) yield dGraph[dNode["<re_value>"][sRegex]] # regex morph arcs if "<re_morph>" in dNode: for sRegex in dNode["<re_morph>"]: for sMorph in _oSpellChecker.getMorph(dToken["sValue"]): if re.search(sRegex, sMorph): print("morph regex matching: ", sRegex) yield dGraph[dNode["<re_morph>"][sRegex]] def _executeActions (self, dNode, nOffset): dErrs = {} for sLineId, nextNodeKey in dNode.items(): for sArc in dGraph[nextNodeKey]: print(sArc) bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ] try: bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error print("-") nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"] nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"] if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor print("~") self._rewrite(sWhat, nErrorStart, nErrorEnd) |
︙ | ︙ | |||
663 664 665 666 667 668 669 | print(">") pass else: print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: | | < < < | < < < < | 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 | print(">") pass else: print("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: raise Exception(str(e), sLineId) return dErrs def _rewrite (self, sWhat, nErrorStart, nErrorEnd): "text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position" lTokenValue = sWhat.split("|") if len(lTokenValue) != (nErrorEnd - nErrorStart + 1): print("Error. Text processor: number of replacements != number of tokens.") return |
︙ | ︙ |
Modified gc_lang/fr/rules_graph.grx from [18deb74635] to [6747fdf087].
︙ | ︙ | |||
35 36 37 38 39 40 41 | # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes | | | | | > | | < | | | | | | | > > > > | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes __avoir_confiance_en__ >avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles] <<- -1>> en # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net TEST: Elle avait confiance {{dans}} lui. __code_legacy__ legacy code code legacy <<- -1:2>> code hérité|code reliquat # Anglicisme superflu. TEST: c’est du {{legacy code}}. TEST: ce {{code legacy}} est un cauchemar __être_en_xxxx__ [>être|>rester|>demeurer] an [désaccord|accord] <<- -2>> en # Confusion. Un an = une année. TEST: Je suis {{an}} désaccord avec lui. |
Modified make.py from [b6664e27ed] to [5704755499].
︙ | ︙ | |||
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars) print(sf, end=", ") print() # TEST FILES with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy: hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n") hDstPy.write(dVars['regex_gctests']) hDstPy.write(dVars['graph_gctests']) createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT) createServerOptions(sLang, dVars) createPackageZip(sLang, dVars, spLangPack) #### JAVASCRIPT | > > > | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars) print(sf, end=", ") print() # TEST FILES with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy: hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n") hDstPy.write("# REGEX RULES\n\n") hDstPy.write(dVars['regex_gctests']) hDstPy.write("\n\n\n# GRAPH RULES\n\n") hDstPy.write(dVars['graph_gctests']) hDstPy.write("\n") createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT) createServerOptions(sLang, dVars) createPackageZip(sLang, dVars, spLangPack) #### JAVASCRIPT |
︙ | ︙ |