Overview
| Comment: | [build][core] graph parser update |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | build | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
7c742b5359f19425564d3f6ed2713e6b |
| User & Date: | olr on 2018-05-25 20:07:58 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-05-29
| ||
| 16:17 | [core] gc engine update check-in: c06b45b671 user: olr tags: core, rg | |
|
2018-05-25
| ||
| 20:07 | [build][core] graph parser update check-in: 7c742b5359 user: olr tags: core, build, rg | |
| 12:14 | [build][core] tests check-in: ac09d7cc19 user: olr tags: core, build, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [db19c73d82] to [b7c579e4d3].
| ︙ | ︙ | |||
9 10 11 12 13 14 15 | from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph, dRule
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
|
| ︙ | ︙ | |||
584 585 586 587 588 589 590 591 592 593 |
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
for i, dPointer in enumerate(lPointer):
bValid = False
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
| > > > > | | > > > > > | > < | > > > > > > > > | | | 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 |
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
# check arcs for each existing pointer
lNewPointer = []
for i, dPointer in enumerate(lPointer):
bValid = False
bFirst = True
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
if bFirst:
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
else:
lNewPointer.append({"nOffset": dPointer["nOffset"], "dNode": dNode})
bFirst = False
bValid = True
if not bValid:
del lPointer[i]
lPointer.extend(lNewPointer)
# check arcs of first nodes
for dNode in self._getNextMatchingNodes(dToken, dGraph[0]):
lPointer.append({"nOffset": 0, "dNode": dNode})
# check if there is rules to check for each pointer
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
dErr = self._executeActions(dPointer["dNode"]["<rules>"], dPointer["nOffset"])
if dErr:
print(dErr)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
print("value found: ", dToken["sValue"])
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
print("lemma found: ", sLemma)
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
print("generic arc")
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
print("value regex matching: ", sRegex)
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
if re.search(sRegex, sMorph):
print("morph regex matching: ", sRegex)
yield dGraph[dNode["<re_morph>"][sRegex]]
def _executeActions (self, dNode, nOffset):
dErrs = {}
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
print(sArc)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nErrorStart = self.iStart + self.lToken[eAct[0]]["nStart"]
nErrorEnd = self.iStart + self.lToken[eAct[1]]["nEnd"]
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
print("~")
self._rewrite(sWhat, nErrorStart, nErrorEnd)
|
| ︙ | ︙ | |||
663 664 665 666 667 668 669 |
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
| | < < < | < < < < | 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 |
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), sLineId)
return dErrs
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
|
| ︙ | ︙ |
Modified gc_lang/fr/rules_graph.grx from [18deb74635] to [6747fdf087].
| ︙ | ︙ | |||
35 36 37 38 39 40 41 | # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes | | | | | > | | < | | | | | | | > > > > | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# Fin d’interprétation du fichier avec une ligne commençant par #END
# ERREURS COURANTES
# http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes
__avoir_confiance_en__
>avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles]
<<- -1>> en # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net
TEST: Elle avait confiance {{dans}} lui.
__code_legacy__
legacy code
code legacy
<<- -1:2>> code hérité|code reliquat # Anglicisme superflu.
TEST: c’est du {{legacy code}}.
TEST: ce {{code legacy}} est un cauchemar
__être_en_xxxx__
[>être|>rester|>demeurer] an [désaccord|accord]
<<- -2>> en # Confusion. Un an = une année.
TEST: Je suis {{an}} désaccord avec lui.
|
Modified make.py from [b6664e27ed] to [5704755499].
| ︙ | ︙ | |||
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
print(sf, end=", ")
print()
# TEST FILES
with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
hDstPy.write(dVars['regex_gctests'])
hDstPy.write(dVars['graph_gctests'])
createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)
createServerOptions(sLang, dVars)
createPackageZip(sLang, dVars, spLangPack)
#### JAVASCRIPT
| > > > | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
print(sf, end=", ")
print()
# TEST FILES
with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
hDstPy.write("# REGEX RULES\n\n")
hDstPy.write(dVars['regex_gctests'])
hDstPy.write("\n\n\n# GRAPH RULES\n\n")
hDstPy.write(dVars['graph_gctests'])
hDstPy.write("\n")
createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)
createServerOptions(sLang, dVars)
createPackageZip(sLang, dVars, spLangPack)
#### JAVASCRIPT
|
| ︙ | ︙ |