Overview
| Comment: | [core] gc engine: jump action to keep pointers until token found |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | core | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
043d1fdd770fbb57f03a3cabf4a87621 |
| User & Date: | olr on 2018-07-31 09:57:25 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-07-31
| ||
| 12:27 | [core] gc engine: use pointer as paramater instead of several values from pointer check-in: 05df35c320 user: olr tags: core, rg | |
| 09:57 | [core] gc engine: jump action to keep pointers until token found check-in: 043d1fdd77 user: olr tags: core, rg | |
| 09:28 | [core] gc engine: generator yield pointers instead of nodes check-in: 746cb57e54 user: olr tags: core, rg | |
Changes
Modified gc_core/py/lang_core/gc_engine.py from [e17d8732c9] to [e0f46ccafe].
| ︙ | ︙ | |||
601 602 603 604 605 606 607 |
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
self.lToken = lNewToken
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
if bDebug:
print("UPDATE:")
print(self)
| | > > > > > > > > > > > > > > > > > > > > > > | | ||
dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
self.lToken = lNewToken
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken if dToken["sType"] != "INFO" }
if bDebug:
print("UPDATE:")
print(self)
def _getNextPointers (self, dToken, dGraph, iNode1, dNode, bKeep=False, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
bTokenFound = False
# token value
if dToken["sValue"] in dNode:
if bDebug:
print(" MATCH:", dToken["sValue"])
yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] }
bTokenFound = True
if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
sValue = dToken["sValue"].lower()
if sValue in dNode:
if bDebug:
print(" MATCH:", sValue)
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
bTokenFound = True
elif dToken["sValue"].isupper():
sValue = dToken["sValue"].lower()
if sValue in dNode:
if bDebug:
print(" MATCH:", sValue)
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
bTokenFound = True
sValue = dToken["sValue"].capitalize()
if sValue in dNode:
if bDebug:
print(" MATCH:", sValue)
yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
bTokenFound = True
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if "¬" not in sRegex:
# no anti-pattern
if re.search(sRegex, dToken["sValue"]):
if bDebug:
print(" MATCH: ~" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
bTokenFound = True
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
continue
if not sPattern or re.search(sPattern, dToken["sValue"]):
if bDebug:
print(" MATCH: ~" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
bTokenFound = True
# analysable tokens
if dToken["sType"][0:4] == "WORD":
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
if bDebug:
print(" MATCH: >" + sLemma)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<lemmas>"][sLemma]] }
bTokenFound = True
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
if "¬" not in sRegex:
# no anti-pattern
lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
if any(re.search(sRegex, sMorph) for sMorph in lMorph):
if bDebug:
print(" MATCH: @" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
bTokenFound = True
else:
# there is an anti-pattern
sPattern, sNegPattern = sRegex.split("¬", 1)
if sNegPattern == "*":
# all morphologies must match with <sPattern>
if sPattern:
lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
if lMorph and all(re.search(sPattern, sMorph) for sMorph in lMorph):
if bDebug:
print(" MATCH: @" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
bTokenFound = True
else:
lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
if sNegPattern and any(re.search(sNegPattern, sMorph) for sMorph in lMorph):
continue
if not sPattern or any(re.search(sPattern, sMorph) for sMorph in lMorph):
if bDebug:
print(" MATCH: @" + sRegex)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
bTokenFound = True
# token tags
if "tags" in dToken and "<tags>" in dNode:
for sTag in dToken["tags"]:
if sTag in dNode["<tags>"]:
if bDebug:
print(" MATCH: /" + sTag)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }
bTokenFound = True
# meta arc (for token type)
if "<meta>" in dNode:
for sMeta in dNode["<meta>"]:
# no regex here, we just search if <dNode["sType"]> exists within <sMeta>
if sMeta == "*":
if bDebug:
print(" MATCH: *" + sMeta)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"]["*"]] }
bTokenFound = True
elif "¬" in sMeta:
if dToken["sType"] not in sMeta:
if bDebug:
print(" MATCH: *" + sMeta)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
bTokenFound = True
elif dToken["sType"] in sMeta:
if bDebug:
print(" MATCH: *" + sMeta)
yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
bTokenFound = True
if bKeep and not bTokenFound:
yield { "iNode1": iNode1, "dNode": dNode, "bKeep": True }
# JUMP
# Warning! Recurssion!
if "<>" in dNode:
yield from self._getNextPointers(self, dToken, dGraph, iNode1, dGraph[dNode["<>"]], True, bDebug)
def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
"parse tokens from the text and execute actions encountered"
dOpt = _dOptions if not dOptions else dOptions
lPointer = []
bTagAndRewrite = False
for iToken, dToken in enumerate(self.lToken):
if bDebug:
print("TOKEN:", dToken["sValue"])
# check arcs for each existing pointer
lNextPointer = []
for dPointer in lPointer:
lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer["iNode1"], dPointer["dNode"], dPointer.get("bKeep", False), bDebug))
lPointer = lNextPointer
# check arcs of first nodes
lPointer.extend(self._getNextPointers(dToken, dGraph, iToken, dGraph[0], False, bDebug))
# check if there is rules to check for each pointer
for dPointer in lPointer:
#if bDebug:
# print("+", dPointer)
if "<rules>" in dPointer["dNode"]:
bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
if bChange:
|
| ︙ | ︙ |
Modified gc_lang/fr/rules.grx from [53b10f9b88] to [2e0ff9f9b7].
| ︙ | ︙ | |||
11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 |
<<- /conf/ -1>> =\1.replace("escell", "écel").replace("essell", "écel")
# Confusion probable si ce mot se rapporte à “\3 \4”. Desceller signifie briser un sceau, un cachet… Desseller signifie ôter une selle.|http://fr.wiktionary.org/wiki/déceler
[>desseller|>desceller] [l’|@:[BD]] [>acidité|>activité|>allergie|>anévrisme|>anomalie|>arnaque|>appendicite|>atrophie|>baisse|>bébé|>blessure|>bug|>bogue|>carie|>cancer|>cause|>changement|>complot|>comète|>concentration|>corrélation|>croissance|>défaut|>défaillance|>demande|>dépression|>diabète|>différence|>diminution|>effluve|>épilepsie|>erreur|>essai|>existence|>grossesse|>grosseur|>faille|>faute|>fuite|>fraude|>grippe|>handicap|>hausse|>hémorragie|>hostilité|>hypertrophie|>incompatibilité|>incohérence|>infection|>infraction|>indice|>infidélité|>insuffisance|>intrigue|>irrégularité|>leucémie|>lésion|>lueur|>lumière|>maladie|>malformation|>manœuvre|>manipulation|>molécule|>mensonge|>mutation|>once|>perturbation|>personnalité|>piste|>perte|>planète|>exoplanète|>présence|>qualité|>odeur|>opportunité|>otite|>problème|>surdité|>talent|>tendance|>tentative|>tumeur|>utilisation|hoax|>variation|>vie|virus]
<<- /conf/ -1>> =\1.replace("escell", "écel").replace("essell", "écel")
# Confusion probable si ce mot se rapporte à “\3”. Desceller signifie briser un sceau, un cachet… Desseller signifie ôter une selle.|http://fr.wiktionary.org/wiki/déceler
TEST: il y a une erreur qu’on peut {{desceller}} dans ses analyses.
TEST: elle a {{dessellé}} une forte hostilité dans ses propos.
TEST: elle a {{dessellé}} deux problèmes.
@@@@
| > | 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 |
<<- /conf/ -1>> =\1.replace("escell", "écel").replace("essell", "écel")
# Confusion probable si ce mot se rapporte à “\3 \4”. Desceller signifie briser un sceau, un cachet… Desseller signifie ôter une selle.|http://fr.wiktionary.org/wiki/déceler
[>desseller|>desceller] [l’|@:[BD]] [>acidité|>activité|>allergie|>anévrisme|>anomalie|>arnaque|>appendicite|>atrophie|>baisse|>bébé|>blessure|>bug|>bogue|>carie|>cancer|>cause|>changement|>complot|>comète|>concentration|>corrélation|>croissance|>défaut|>défaillance|>demande|>dépression|>diabète|>différence|>diminution|>effluve|>épilepsie|>erreur|>essai|>existence|>grossesse|>grosseur|>faille|>faute|>fuite|>fraude|>grippe|>handicap|>hausse|>hémorragie|>hostilité|>hypertrophie|>incompatibilité|>incohérence|>infection|>infraction|>indice|>infidélité|>insuffisance|>intrigue|>irrégularité|>leucémie|>lésion|>lueur|>lumière|>maladie|>malformation|>manœuvre|>manipulation|>molécule|>mensonge|>mutation|>once|>perturbation|>personnalité|>piste|>perte|>planète|>exoplanète|>présence|>qualité|>odeur|>opportunité|>otite|>problème|>surdité|>talent|>tendance|>tentative|>tumeur|>utilisation|hoax|>variation|>vie|virus]
<<- /conf/ -1>> =\1.replace("escell", "écel").replace("essell", "écel")
# Confusion probable si ce mot se rapporte à “\3”. Desceller signifie briser un sceau, un cachet… Desseller signifie ôter une selle.|http://fr.wiktionary.org/wiki/déceler
TEST: il faut régler les problèmes {{descellés}} dans le code de l’analyseur syntaxique.
TEST: il y a une erreur qu’on peut {{desceller}} dans ses analyses.
TEST: elle a {{dessellé}} une forte hostilité dans ses propos.
TEST: elle a {{dessellé}} deux problèmes.
@@@@
|
| ︙ | ︙ |