Index: compile_rules.py ================================================================== --- compile_rules.py +++ compile_rules.py @@ -28,35 +28,35 @@ s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s) s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s) s = re.sub(r"isRealEnd *\(\)", 'after("^ *$")', s) s = re.sub(r"isEnd0 *\(\)", 'after0("^ *$|^,")', s) s = re.sub(r"isRealEnd0 *\(\)", 'after0("^ *$")', s) - s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(dDA, m.start(\\2), m.group(\\2)', s) - s = re.sub(r"define[(][\\](\d+)", 'define(dDA, m.start(\\1)', s) + s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(dTokenPos, m.start(\\2), m.group(\\2)', s) + s = re.sub(r"define[(][\\](\d+)", 'define(dTokenPos, m.start(\\1)', s) s = re.sub(r"(morph|morphex|displayInfo)[(][\\](\d+)", '\\1((m.start(\\2), m.group(\\2))', s) - s = re.sub(r"(morph|morphex|displayInfo)[(]", '\\1(dDA, ', s) + s = re.sub(r"(morph|morphex|displayInfo)[(]", '\\1(dTokenPos, ', s) s = re.sub(r"(sugg\w+|switch\w+)\(@", '\\1(m.group(i[4])', s) s = re.sub(r"word\(\s*1\b", 'nextword1(s, m.end()', s) # word(1) s = re.sub(r"word\(\s*-1\b", 'prevword1(s, m.start()', s) # word(-1) s = re.sub(r"word\(\s*(\d)", 'nextword(s, m.end(), \\1', s) # word(n) s = re.sub(r"word\(\s*-(\d)", 'prevword(s, m.start(), \\1', s) # word(-n) s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s) s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s) s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s) - s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s) - s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s) - s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s) + s = re.sub(r"before_chk1\(\s*", 'look_chk1(dTokenPos, s[:m.start()], 0, ', s) # before_chk1(s) + s = re.sub(r"after_chk1\(\s*", 'look_chk1(dTokenPos, s[m.end():], m.end(), ', s) # after_chk1(s) + s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dTokenPos, s, 0, ', s) # textarea_chk1(s) s = re.sub(r"/0", 'sx[m.start():m.end()]', s) # /0 s = re.sub(r"before0\(\s*", 'look(sx[:m.start()], ', s) # before0(s) s = re.sub(r"after0\(\s*", 'look(sx[m.end():], ', s) # after0(s) s = re.sub(r"textarea0\(\s*", 'look(sx, ', s) # textarea0(s) - s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dDA, sx[:m.start()], 0, ', s) # before0_chk1(s) - s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dDA, sx[m.end():], m.end(), ', s) # after0_chk1(s) - s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dDA, sx, 0, ', s) # textarea0_chk1(s) - s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s) - s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s) - s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) + s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dTokenPos, sx[:m.start()], 0, ', s) # before0_chk1(s) + s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dTokenPos, sx[m.end():], m.end(), ', s) # after0_chk1(s) + s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dTokenPos, sx, 0, ', s) # textarea0_chk1(s) + s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dTokenPos, s[m.end():], m.end())', s) # isEndOfNG(s) + s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dTokenPos, s[m.end():], m.end())', s) # isNextNotCOD(s) + s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dTokenPos, s[m.end():], m.end())', s) # isNextVerb(s) s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s) s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s) return s @@ -549,19 +549,19 @@ sPyCallables = "# generated code, do not edit\n" sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" for sFuncName, sReturn in lFUNCTIONS: cType = sFuncName[0:1] if cType == "c": # condition - sParams = "s, sx, m, dDA, sCountry, bCondMemo" + sParams = "s, sx, m, dTokenPos, sCountry, bCondMemo" elif cType == "m": # message sParams = "s, m" elif cType == "s": # suggestion sParams = "s, m" elif cType == "p": # preprocessor sParams = "s, m" elif cType == "d": # disambiguator - sParams = "s, m, dDA" + sParams = "s, m, dTokenPos" else: print("# Unknown function type in [" + sFuncName + "]") continue sPyCallables += "def {} ({}):\n".format(sFuncName, sParams) sPyCallables += " return " + sReturn + "\n" Index: gc_core/py/lang_core/gc_engine.py ================================================================== --- gc_core/py/lang_core/gc_engine.py +++ gc_core/py/lang_core/gc_engine.py @@ -106,18 +106,17 @@ def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" #sText = unicodedata.normalize("NFC", sText) aErrors = None sRealText = sText - dDA = {} # Disambiguisator. Key = position; value = list of morphologies dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions bShowRuleId = option('idrule') # parse paragraph try: - sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) + sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) if sNew: sText = sNew except: raise @@ -132,14 +131,13 @@ sText = sText.replace("‑", "-") # nobreakdash # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: - dDA.clear() try: oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart) - _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) + _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext) aErrors.update(errs) except: raise return aErrors.values() # this is a view (iterable) @@ -153,14 +151,15 @@ for m in _zEndOfSentence.finditer(sText): yield (iStart, m.end()) iStart = m.end() -def _proofread (oSentence, s, sx, nOffset, bParagraph, dDA, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): +def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext): dErrs = {} bParagraphChange = False bSentenceChange = False + dTokenPos = oSentence.dTokenPos if oSentence else {} for sOption, lRuleGroup in _getRules(bParagraph): if sOption == "@@@@": # graph rules if not bParagraph and bSentenceChange: oSentence.update(s) @@ -181,11 +180,11 @@ for m in zRegex.finditer(s): bCondMemo = None for sFuncCond, cActionType, sWhat, *eAct in lActions: # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ] try: - bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo) + bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dTokenPos, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error nErrorStart = nOffset + m.start(eAct[0]) if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: @@ -198,13 +197,14 @@ bSentenceChange = True if bDebug: echo("~ " + s + " -- " + m.group(eAct[0]) + " # " + sLineId) elif cActionType == "=": # disambiguation - globals()[sWhat](s, m, dDA) - if bDebug: - echo("= " + m.group(0) + " # " + sLineId + "\nDA: " + str(dDA)) + if not bParagraph: + globals()[sWhat](s, m, dTokenPos) + if bDebug: + echo("= " + m.group(0) + " # " + sLineId) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass else: echo("# error: unknown action at " + sLineId) @@ -397,43 +397,43 @@ def option (sOpt): "return True if option sOpt is active" return _dOptions.get(sOpt, False) -def displayInfo (dDA, tWord): +def displayInfo (dTokenPos, tWord): "for debugging: retrieve info of word" if not tWord: echo("> nothing to find") return True lMorph = _oSpellChecker.getMorph(tWord[1]) if not lMorph: echo("> not in dictionary") return True - if tWord[0] in dDA: - echo("DA: " + str(dDA[tWord[0]])) + if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: + echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) echo("FSA: " + str(lMorph)) return True -def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False): +def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False): "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)" if not tWord: return bNoWord - lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1]) + lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False p = re.compile(sPattern) if bStrict: return all(p.search(s) for s in lMorph) return any(p.search(s) for s in lMorph) -def morphex (dDA, tWord, sPattern, sNegPattern, bNoWord=False): +def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord - lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1]) + lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False # check negative condition np = re.compile(sNegPattern) if any(np.search(s) for s in lMorph): @@ -515,11 +515,11 @@ if re.search(sPattern, s): return True return False -def look_chk1 (dDA, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=None): +def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=None): "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" m = re.search(sPattern, s) if not m: return False try: @@ -526,52 +526,57 @@ sWord = m.group(1) nPos = m.start(1) + nOffset except: return False if sNegPatternGroup1: - return morphex(dDA, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) - return morph(dDA, (nPos, sWord), sPatternGroup1, False) + return morphex(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) + return morph(dTokenPos, (nPos, sWord), sPatternGroup1, False) #### Disambiguator -def select (dDA, nPos, sWord, sPattern, lDefault=None): +def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): if not sWord: return True - if nPos in dDA: + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): - dDA[nPos] = lSelect + dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: - dDA[nPos] = lDefault + dTokenPos[nPos]["lMorph"] = lDefault return True -def exclude (dDA, nPos, sWord, sPattern, lDefault=None): +def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): if not sWord: return True - if nPos in dDA: + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) return True lMorph = _oSpellChecker.getMorph(sWord) if not lMorph or len(lMorph) == 1: return True lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] if lSelect: if len(lSelect) != len(lMorph): - dDA[nPos] = lSelect + dTokenPos[nPos]["lMorph"] = lSelect elif lDefault: - dDA[nPos] = lDefault + dTokenPos[nPos]["lMorph"] = lDefault return True -def define (dDA, nPos, lMorph): - dDA[nPos] = lMorph +def define (dTokenPos, nPos, lMorph): + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + dTokenPos[nPos]["lMorph"] = lMorph return True #### GRAMMAR CHECKER PLUGINS Index: gc_lang/fr/rules.grx ================================================================== --- gc_lang/fr/rules.grx +++ gc_lang/fr/rules.grx @@ -390,11 +390,10 @@ https?://[\w./?&!%=+*"'@$#-]+ <<- ~>> * __> * <<- ~2>> =\2.capitalize() - <<- =>> define(\2, [":MP:e:i"]) <<- ~3>> * # Numéro de chapitre __(p_chapitre)__ ^\d+[.][\d.-]* <<- ~>> * @@ -12398,12 +12397,14 @@ # verbe que + subjonctif __vmode_qqch_que_subjonctif1__ [>afin|>avant|>pour|>quoi|>permettre|>falloir|>vouloir|>ordonner|>exiger|>désirer|>préférer|>suffire] [que|qu’|qu] @:(?:Os|M) @:I¬:[GYS] - >douter [que|qu’|qu] @:(?:Os|M) @:I¬:(?:[GYSK]|If) - <<- -4>> =suggVerbMode(\4, ":S", \3) # Après « \1 que », ce verbe devrait être au subjonctif. + <<- /vmode/ -4>> =suggVerbMode(\4, ":S", \3) # Après « \1 que », ce verbe devrait être au subjonctif. + + >douter [que|qu’|qu] @:(?:Os|M) @:I¬:(?:[GYSK]|If) + <<- /vmode/ morph(\1, ":V", ":N") -4>> =suggVerbMode(\4, ":S", \3) # Après « \1 que », ce verbe devrait être au subjonctif. TEST: Il suffit qu’il {{court}} plus TEST: Je veux qu’il {{finit}} son repas. TEST: quoi qu’il en {{conclut}} TEST: Je ne veux pas que tu {{es}} des ennuis