Overview
Comment: | [build][core][fr] tags for sentence |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fr | core | build | rg |
Files: | files | file ages | folders |
SHA3-256: |
22a9b5a923787cdcf4bf410058b538c3 |
User & Date: | olr on 2018-06-22 11:11:33 |
Other Links: | branch diff | manifest | tags |
Context
2018-06-22
| ||
12:23 | [build] check casing within rules condition check-in: 0d097b9fcd user: olr tags: build, rg | |
11:11 | [build][core][fr] tags for sentence check-in: 22a9b5a923 user: olr tags: fr, core, build, rg | |
08:40 | [build] code clarification for graph rules reader check-in: 34a15fd0fb user: olr tags: build, rg | |
Changes
Modified compile_rules_graph.py from [06e4c2c6a2] to [e586a73a20].
︙ | ︙ | |||
10 11 12 13 14 15 16 17 | dACTIONS = {} dFUNCTIONS = {} def prepareFunction (s, bTokenValue=False): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) | > | | 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | dACTIONS = {} dFUNCTIONS = {} def prepareFunction (s, bTokenValue=False): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s) s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s) s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s) # next token s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s) # previous token s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s) s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s) s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s) s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s) |
︙ | ︙ | |||
143 144 145 146 147 148 149 | # Option sOption = False m = re.match("/(\\w+)/", sAction) if m: sOption = m.group(1) sAction = sAction[m.end():].strip() # valid action? | | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | # Option sOption = False m = re.match("/(\\w+)/", sAction) if m: sOption = m.group(1) sAction = sAction[m.end():].strip() # valid action? m = re.search("(?P<action>[-~=/])(?P<start>\\d+|)(?P<end>:\\d+|)>> ", sAction) if not m: print(" # Error. No action found at: ", sActionId) print(" ==", sAction, "==") return None # Condition sCondition = sAction[:m.start()].strip() if sCondition: |
︙ | ︙ | |||
228 229 230 231 232 233 234 235 236 237 238 239 240 241 | elif cAction == "~": ## text processor if sAction[0:1] == "=": dFUNCTIONS["g_p_"+sActionId] = sAction[1:] sAction = "=g_p_"+sActionId elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction] elif cAction == "=": ## disambiguator if sAction[0:1] == "=": sAction = sAction[1:] if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction): print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings") | > > > | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | elif cAction == "~": ## text processor if sAction[0:1] == "=": dFUNCTIONS["g_p_"+sActionId] = sAction[1:] sAction = "=g_p_"+sActionId elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction] elif cAction == "/": ## tags return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction] elif cAction == "=": ## disambiguator if sAction[0:1] == "=": sAction = sAction[1:] if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction): print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings") |
︙ | ︙ | |||
333 334 335 336 337 338 339 | # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" for sFuncName, sReturn in dFUNCTIONS.items(): if sFuncName.startswith("g_c_"): # condition | | | 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" for sFuncName, sReturn in dFUNCTIONS.items(): if sFuncName.startswith("g_c_"): # condition sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags" elif sFuncName.startswith("g_m_"): # message sParams = "lToken, nTokenOffset" elif sFuncName.startswith("g_s_"): # suggestion sParams = "lToken, nTokenOffset" elif sFuncName.startswith("g_p_"): # preprocessor sParams = "lToken" elif sFuncName.startswith("g_d_"): # disambiguator |
︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [d166a26ff3] to [3f95e0d88e].
︙ | ︙ | |||
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 | def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" | > > | 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 | def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffsetWithinParagraph = nOffset self.lToken = list(_oTokenizer.genTokens(sSentence, True)) self.dTokenPos = { dToken["nStart"]: dToken for dToken in self.lToken } self.dTags = {} self.createError = self._createWriterError if _bWriterError else self._createDictError def update (self, sSentence): self.sSentence = sSentence self.lToken = list(_oTokenizer.genTokens(sSentence, True)) def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False): "generator: return nodes where <dToken> “values” match <dNode> arcs" |
︙ | ︙ | |||
702 703 704 705 706 707 708 | bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: print("ACTION:", sRuleId) print(dRule[sRuleId]) sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] | > > | > > | | 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 | bCondMemo = None for sRuleId in dGraph[nextNodeKey]: try: if bDebug: print("ACTION:", sRuleId) print(dRule[sRuleId]) sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId] # Suggestion [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ] # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Disambiguator [ option, condition, "=", replacement/suggestion/action ] # Tag [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ] # Test [ option, condition, ">", "" ] if not sOption or dOptions.get(sOption, False): bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags) if bCondMemo: if cActionType == "-": # grammar error nTokenErrorStart = nTokenOffset + eAct[0] nTokenErrorEnd = (nTokenOffset + eAct[1]) if eAct[1] else nLastToken nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"] nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"] |
︙ | ︙ | |||
734 735 736 737 738 739 740 741 742 743 744 745 746 747 | if bDebug: print("=", sRuleId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: print(">", sRuleId) pass else: print("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: print(">!", sRuleId) break except Exception as e: | > > > > > > > > > | 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 | if bDebug: print("=", sRuleId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions if bDebug: print(">", sRuleId) pass elif cActionType == "/": # tags nTokenTag = nTokenOffset + eAct[0] if sWhat not in self.dTags: self.dTags[sWhat] = (nTokenTag, nTokenTag) elif nTokenTag > self.dTags[sWhat][1]: self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag) if bDebug: print("/", sRuleId) else: print("# error: unknown action at " + sLineId) elif cActionType == ">": if bDebug: print(">!", sRuleId) break except Exception as e: |
︙ | ︙ | |||
932 933 934 935 936 937 938 939 940 941 942 943 944 945 | zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(sMorph) for sMorph in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) #### Disambiguator def g_select (dToken, sPattern, lDefault=None): "select morphologies for <dToken> according to <sPattern>, always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) | > > > > > > > > > > > > > > > | 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 | zNegPattern = re.compile(sNegPattern) if any(zNegPattern.search(sMorph) for sMorph in lMorph): return False # search sPattern zPattern = re.compile(sPattern) return any(zPattern.search(sMorph) for sMorph in lMorph) def g_tag_before (dToken, sTag, dTags): if sTag not in dTags: return False if dToken["nStart"] > dTags[sTag][0]: return True return False def g_tag_after (dToken, sTag, dTags): if sTag not in dTags: return False if dToken["nStart"] < dTags[sTag][1]: return True return False #### Disambiguator def g_select (dToken, sPattern, lDefault=None): "select morphologies for <dToken> according to <sPattern>, always return True" lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) |
︙ | ︙ |
Modified gc_lang/fr/rules.grx from [25bf3d2ac0] to [130969b0ab].
︙ | ︙ | |||
4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 | @@@@ @@@@ @@@@GRAPH: graphe1 @@@@ @@@@ @@@@ @@@@ !! !! !!!! Locutions invariables !! !! | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 | @@@@ @@@@ @@@@GRAPH: graphe1 @@@@ @@@@ @@@@ @@@@ __tag_sujets__ [je|j’] moi qui moi [seul|seule] <<- />> 1s tu toi ?,¿ qui toi [seul|seule] <<- />> 2s nous nous ?,¿ qui nous-même nous-mêmes nous [seul|seuls|seules] et moi ni moi moi et <<- />> 1p vous vous ?,¿ qui vous-même vous-mêmes vous [seul|seule|seuls|seules] et toi ni toi toi et <<- />> 2p !! !! !!!! Locutions invariables !! !! |
︙ | ︙ |