ADDED gc_core/py/lang_core/gc_engine_func.py Index: gc_core/py/lang_core/gc_engine_func.py ================================================================== --- gc_core/py/lang_core/gc_engine_func.py +++ gc_core/py/lang_core/gc_engine_func.py @@ -0,0 +1,425 @@ +""" +Grammar checking functions +""" + +# generated code, do not edit +# source: gc_core/py/lang_core/gc_engine_func.py + +import re + +from . import gc_options + +_sAppContext = "Python" # what software is running +_oSpellChecker = None + + +def load (sContext, oSpellChecker): + global _sAppContext + global _oSpellChecker + _sAppContext = sContext + _oSpellChecker = oSpellChecker + + +#### common functions + +def option (sOpt): + "return True if option is active" + return gc_options.dOptions.get(sOpt, False) + + +#### Functions to get text outside pattern scope + +# warning: check compile_rules.py to understand how it works + +_zNextWord = re.compile(r" +(\w[\w-]*)") +_zPrevWord = re.compile(r"(\w[\w-]*) +$") + +def nextword (s, iStart, n): + "get the nth word of the input string or empty string" + m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword (s, iEnd, n): + "get the (-)nth word of the input string or empty string" + m = re.search("([\\w%-]+) +(?:[\\w%-]+ +){" + str(n-1) + "}$", s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def nextword1 (s, iStart): + "get next word (optimization)" + m = _zNextWord.match(s[iStart:]) + if not m: + return None + return (iStart+m.start(1), m.group(1)) + + +def prevword1 (s, iEnd): + "get previous word (optimization)" + m = _zPrevWord.search(s[:iEnd]) + if not m: + return None + return (m.start(1), m.group(1)) + + +def look (s, sPattern, sNegPattern=None): + "seek sPattern in s (before/after/fulltext), if sNegPattern not in s" + if sNegPattern and re.search(sNegPattern, s): + return False + if re.search(sPattern, s): + return True + return False + + +def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=""): + "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1" + m = re.search(sPattern, s) + if not m: + return False + try: + sWord = m.group(1) + nPos = m.start(1) + nOffset + except IndexError: + return False + return morph(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1) + + + +#### Analyse groups for regex rules + +def displayInfo (dTokenPos, tWord): + "for debugging: retrieve info of word" + if not tWord: + print("> nothing to find") + return True + lMorph = _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + print("> not in dictionary") + return True + print("TOKENS:", dTokenPos) + if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]: + print("DA: " + str(dTokenPos[tWord[0]]["lMorph"])) + print("FSA: " + str(lMorph)) + return True + + +def morph (dTokenPos, tWord, sPattern, sNegPattern="", bNoWord=False): + "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" + if not tWord: + return bNoWord + lMorph = dTokenPos[tWord[0]]["lMorph"] if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]] else _oSpellChecker.getMorph(tWord[1]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def analyse (sWord, sPattern, sNegPattern=""): + "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +#### Analyse tokens for graph rules + +def g_value (dToken, sValues, nLeft=None, nRight=None): + "test if is in sValues (each value should be separated with |)" + sValue = "|"+dToken["sValue"]+"|" if nLeft is None else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|" + if sValue in sValues: + return True + if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout". + if sValue.lower() in sValues: + return True + elif dToken["sValue"].isupper(): + #if sValue.lower() in sValues: + # return True + sValue = "|"+sValue[1:].capitalize() + if sValue in sValues: + return True + sValue = sValue.lower() + if sValue in sValues: + return True + return False + + +def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies" + if "lMorph" in dToken: + lMorph = dToken["lMorph"] + else: + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True): + "analyse a token, return True if not in morphologies and in morphologies (disambiguation off)" + if nLeft is not None: + lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + if bMemorizeMorph: + dToken["lMorph"] = lMorph + else: + lMorph = _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + return all(zPattern.search(sMorph) for sMorph in lMorph) + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + return any(zPattern.search(sMorph) for sMorph in lMorph) + + +def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True): + "merge two token values, return True if not in morphologies and in morphologies (disambiguation off)" + lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"]) + if not lMorph: + return False + # check negative condition + if sNegPattern: + if sNegPattern == "*": + # all morph must match sPattern + zPattern = re.compile(sPattern) + bResult = all(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + zNegPattern = re.compile(sNegPattern) + if any(zNegPattern.search(sMorph) for sMorph in lMorph): + return False + # search sPattern + zPattern = re.compile(sPattern) + bResult = any(zPattern.search(sMorph) for sMorph in lMorph) + if bResult and bSetMorph: + dToken1["lMorph"] = lMorph + return bResult + + +def g_tag_before (dToken, dTags, sTag): + "returns True if is present on tokens before " + if sTag not in dTags: + return False + if dToken["i"] > dTags[sTag][0]: + return True + return False + + +def g_tag_after (dToken, dTags, sTag): + "returns True if is present on tokens after " + if sTag not in dTags: + return False + if dToken["i"] < dTags[sTag][1]: + return True + return False + + +def g_tag (dToken, sTag): + "returns True if is present on token " + return "aTags" in dToken and sTag in dToken["aTags"] + + +def g_meta (dToken, sType): + "returns True if is equal to the token type" + return dToken["sType"] == sType + + +def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None): + "checks if spaces between tokens is >= and <= " + nSpace = dToken2["nStart"] - dToken1["nEnd"] + if nSpace < nMin: + return False + if nMax is not None and nSpace > nMax: + return False + return True + + +def g_token (lToken, i): + "return token at index in lToken (or the closest one)" + if i < 0: + return lToken[0] + if i >= len(lToken): + return lToken[-1] + return lToken[i] + + + +#### Disambiguator for regex rules + +def select (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: select morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None): + "Disambiguation: exclude morphologies of matching " + if not sWord: + return True + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + lMorph = _oSpellChecker.getMorph(sWord) + if not lMorph or len(lMorph) == 1: + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dTokenPos[nPos]["lMorph"] = lSelect + elif lDefault: + dTokenPos[nPos]["lMorph"] = lDefault + return True + + +def define (dTokenPos, nPos, lMorph): + "Disambiguation: set morphologies of token at with " + if nPos not in dTokenPos: + print("Error. There should be a token at this position: ", nPos) + return True + dTokenPos[nPos]["lMorph"] = lMorph + return True + + +#### Disambiguation for graph rules + +def g_select (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_exclude (dToken, sPattern, lDefault=None): + "Disambiguation: select morphologies for according to , always return True" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + if not lMorph or len(lMorph) == 1: + if lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] + if lSelect: + if len(lSelect) != len(lMorph): + dToken["lMorph"] = lSelect + elif lDefault: + dToken["lMorph"] = lDefault + #print("DA:", dToken["sValue"], dToken["lMorph"]) + return True + + +def g_add_morph (dToken, lNewMorph): + "Disambiguation: add a morphology to a token" + lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"]) + lMorph.extend(lNewMorph) + dToken["lMorph"] = lMorph + return True + + +def g_define (dToken, lMorph): + "Disambiguation: set morphologies of , always return True" + dToken["lMorph"] = lMorph + #print("DA:", dToken["sValue"], lMorph) + return True + + +def g_define_from (dToken, nLeft=None, nRight=None): + "Disambiguation: set morphologies of with slicing its value with and " + if nLeft is not None: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)]) + else: + dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"]) + return True + + +def g_change_meta (dToken, sType): + "Disambiguation: change type of token" + dToken["sType"] = sType + return True + + + +#### GRAMMAR CHECKER PLUGINS + +${plugins} + + +#### CALLABLES FOR REGEX RULES (generated code) + +${callables} + + +#### CALLABLES FOR GRAPH RULES (generated code) + +${graph_callables}