Overview
| Comment: | [build][core][fr] tags for sentence |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | fr | core | build | rg |
| Files: | files | file ages | folders |
| SHA3-256: |
22a9b5a923787cdcf4bf410058b538c3 |
| User & Date: | olr on 2018-06-22 11:11:33 |
| Other Links: | branch diff | manifest | tags |
Context
|
2018-06-22
| ||
| 12:23 | [build] check casing within rules condition check-in: 0d097b9fcd user: olr tags: build, rg | |
| 11:11 | [build][core][fr] tags for sentence check-in: 22a9b5a923 user: olr tags: fr, core, build, rg | |
| 08:40 | [build] code clarification for graph rules reader check-in: 34a15fd0fb user: olr tags: build, rg | |
Changes
Modified compile_rules_graph.py from [06e4c2c6a2] to [e586a73a20].
| ︙ | ︙ | |||
10 11 12 13 14 15 16 17 |
dACTIONS = {}
dFUNCTIONS = {}
def prepareFunction (s, bTokenValue=False):
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
| > | | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
dACTIONS = {}
dFUNCTIONS = {}
def prepareFunction (s, bTokenValue=False):
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token
s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token
s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s)
s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s)
s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s)
s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s)
|
| ︙ | ︙ | |||
143 144 145 146 147 148 149 |
# Option
sOption = False
m = re.match("/(\\w+)/", sAction)
if m:
sOption = m.group(1)
sAction = sAction[m.end():].strip()
# valid action?
| | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# Option
sOption = False
m = re.match("/(\\w+)/", sAction)
if m:
sOption = m.group(1)
sAction = sAction[m.end():].strip()
# valid action?
m = re.search("(?P<action>[-~=/])(?P<start>\\d+|)(?P<end>:\\d+|)>> ", sAction)
if not m:
print(" # Error. No action found at: ", sActionId)
print(" ==", sAction, "==")
return None
# Condition
sCondition = sAction[:m.start()].strip()
if sCondition:
|
| ︙ | ︙ | |||
228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
elif cAction == "~":
## text processor
if sAction[0:1] == "=":
dFUNCTIONS["g_p_"+sActionId] = sAction[1:]
sAction = "=g_p_"+sActionId
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
elif cAction == "=":
## disambiguator
if sAction[0:1] == "=":
sAction = sAction[1:]
if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings")
| > > > | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
elif cAction == "~":
## text processor
if sAction[0:1] == "=":
dFUNCTIONS["g_p_"+sActionId] = sAction[1:]
sAction = "=g_p_"+sActionId
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
elif cAction == "/":
## tags
return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
elif cAction == "=":
## disambiguator
if sAction[0:1] == "=":
sAction = sAction[1:]
if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings")
|
| ︙ | ︙ | |||
333 334 335 336 337 338 339 |
# creating file with all functions callable by rules
print(" creating callables...")
sPyCallables = "# generated code, do not edit\n"
#sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
for sFuncName, sReturn in dFUNCTIONS.items():
if sFuncName.startswith("g_c_"): # condition
| | | 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
# creating file with all functions callable by rules
print(" creating callables...")
sPyCallables = "# generated code, do not edit\n"
#sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
for sFuncName, sReturn in dFUNCTIONS.items():
if sFuncName.startswith("g_c_"): # condition
sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags"
elif sFuncName.startswith("g_m_"): # message
sParams = "lToken, nTokenOffset"
elif sFuncName.startswith("g_s_"): # suggestion
sParams = "lToken, nTokenOffset"
elif sFuncName.startswith("g_p_"): # preprocessor
sParams = "lToken"
elif sFuncName.startswith("g_d_"): # disambiguator
|
| ︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [d166a26ff3] to [3f95e0d88e].
| ︙ | ︙ | |||
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 |
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
| > > | 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 |
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffsetWithinParagraph = nOffset
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken }
self.dTags = {}
self.createError = self._createWriterError if _bWriterError else self._createDictError
def update (self, sSentence):
self.sSentence = sSentence
self.lToken = list(_oTokenizer.genTokens(sSentence, True))
def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
"generator: return nodes where <dToken> “values” match <dNode> arcs"
|
| ︙ | ︙ | |||
702 703 704 705 706 707 708 |
bCondMemo = None
for sRuleId in dGraph[nextNodeKey]:
try:
if bDebug:
print("ACTION:", sRuleId)
print(dRule[sRuleId])
sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
| > > | > > | | 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 |
bCondMemo = None
for sRuleId in dGraph[nextNodeKey]:
try:
if bDebug:
print("ACTION:", sRuleId)
print(dRule[sRuleId])
sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
# Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ]
# TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Disambiguator [ option, condition, "=", replacement/suggestion/action ]
# Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
# Test [ option, condition, ">", "" ]
if not sOption or dOptions.get(sOption, False):
bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags)
if bCondMemo:
if cActionType == "-":
# grammar error
nTokenErrorStart = nTokenOffset + eAct[0]
nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken
nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
|
| ︙ | ︙ | |||
734 735 736 737 738 739 740 741 742 743 744 745 746 747 |
if bDebug:
print("=", sRuleId)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">", sRuleId)
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
break
except Exception as e:
| > > > > > > > > > | 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 |
if bDebug:
print("=", sRuleId)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
if bDebug:
print(">", sRuleId)
pass
elif cActionType == "/":
# tags
nTokenTag = nTokenOffset + eAct[0]
if sWhat not in self.dTags:
self.dTags[sWhat] = (nTokenTag, nTokenTag)
elif nTokenTag > self.dTags[sWhat][1]:
self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
if bDebug:
print("/", sRuleId)
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
if bDebug:
print(">!", sRuleId)
break
except Exception as e:
|
| ︙ | ︙ | |||
932 933 934 935 936 937 938 939 940 941 942 943 944 945 |
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
| > > > > > > > > > > > > > > > | 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 |
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_tag_before (dToken, sTag, dTags):
if sTag not in dTags:
return False
if dToken["nStart"] > dTags[sTag][0]:
return True
return False
def g_tag_after (dToken, sTag, dTags):
if sTag not in dTags:
return False
if dToken["nStart"] < dTags[sTag][1]:
return True
return False
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
|
| ︙ | ︙ |
Modified gc_lang/fr/rules.grx from [25bf3d2ac0] to [130969b0ab].
| ︙ | ︙ | |||
4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 | @@@@ @@@@ @@@@GRAPH: graphe1 @@@@ @@@@ @@@@ @@@@ !! !! !!!! Locutions invariables !! !! | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 |
@@@@
@@@@
@@@@GRAPH: graphe1
@@@@
@@@@
@@@@
@@@@
__tag_sujets__
[je|j’]
moi qui
moi [seul|seule]
<<- />> 1s
tu
toi ?,¿ qui
toi [seul|seule]
<<- />> 2s
nous
nous ?,¿ qui
nous-même
nous-mêmes
nous [seul|seuls|seules]
et moi
ni moi
moi et
<<- />> 1p
vous
vous ?,¿ qui
vous-même
vous-mêmes
vous [seul|seule|seuls|seules]
et toi
ni toi
toi et
<<- />> 2p
!!
!!
!!!! Locutions invariables
!!
!!
|
| ︙ | ︙ |