Grammalecte  Artifact [26cba906c4]

Artifact 26cba906c4bc432883964842c917e0eeadb196271fc2b8532f6f40f987dc84a6:

#!/usr/bin/env python3

Grammalecte CLI (command line interface)

import sys
import os.path
import argparse
import json
import itertools
import re
import traceback

import grammalecte
import grammalecte.text as txt
import grammalecte.graphspell.str_transform as strt
from grammalecte.graphspell.echo import echo

_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """
Analysis commands:
    any_text                            grammar checking
    ?word1 [word2] ...                  words analysis
    !word                               spelling suggestion
    >word                               draw path of word in the word graph
    =[filter1][=[filter2]]              show entries which fit to filters (filter1 for word, filter2 for morphology)
    ≠word|word|…                        show distance between words
    $some_text                          show sentences and tokens of text

Other commands:
    /help                       /h      show this text
    /lopt                       /lo     list options
    /lrules [pattern]           /lr     list rules
    /o+ option1 [option2] ...           activate grammar checking options
    /o- option1 [option2] ...           deactivate grammar checking options
    /r+ rule1 [rule2] ...               reactivate grammar checking rule
    /r- rule1 [rule2] ...               deactivate grammar checking rule
    /textformatter              /tf     switch on/off the text formatter
    /debug                      /d      switch on/off the debug mode
    /quit                       /q      exit

def _getText (sInputText):
    sText = input(sInputText)
    if sText == "*":
        return _EXAMPLE
    if sys.platform == "win32":
        # Apparently, the console transforms «’» in «'».
        # So we reverse it to avoid many useless warnings.
        sText = sText.replace("'", "’")
    return sText

def readFile (spf):
    "generator: returns file line by line"
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                yield sLine
        print("# Error: file <" + spf + "> not found.")

def generateParagraphFromFile (spf, bConcatLines=False):
    "generator: returns text by tuple of (iParagraph, sParagraph, lLineSet)"
    if not bConcatLines:
        for iParagraph, sLine in enumerate(readFile(spf), 1):
            yield iParagraph, sLine, None
        lLine = []
        iParagraph = 1
        for iLine, sLine in enumerate(readFile(spf), 1):
            if sLine.strip():
                lLine.append((iLine, sLine))
            elif lLine:
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                yield iParagraph, sText, lLineSet
                lLine = []
            iParagraph += 1
        if lLine:
            sText, lLineSet = txt.createParagraphWithLines(lLine)
            yield iParagraph, sText, lLineSet

def output (sText, hDst=None):
    "write in the console or in a file if <hDst> not null"
    if not hDst:
        echo(sText, end="")

def loadDictionary (spf):
    "returns the dictionary as a dictionary object"
    if os.path.isfile(spf):
        sJSON = open(spf, "r", encoding="utf-8").read()
            oJSON = json.loads(sJSON)
        except json.JSONDecodeError:
            print("# Error. File <" + spf + " is not a valid JSON file.")
            return None
        return oJSON
    print("# Error: file <" + spf + "> not found.")
    return None

def getCommand ():
    while True:
        print("COMMANDS: [N]ext paragraph  [Q]uit.")
        print("          [Error number]>[suggestion number]")
        print("          [Error number] (apply first suggestion)")
        print("          [Error number]=[replacement]")
        sCommand = input("        ? ")
        if sCommand == "q" or sCommand == "Q" or sCommand == "n" or sCommand == "N":
            return sCommand
        elif re.match("^([0-9]+)(>[0-9]*|=.*|)$", sCommand):
            m = re.match("^([0-9]+)(>[0-9]*|=.*|)$", sCommand)
            nError = int( - 1
            cAction =[0:1] or ">"
            if cAction == ">":
                vSugg = int([1:]) - 1  if  else 0
                vSugg =[1:]
            return (nError, cAction, vSugg)

def main ():
    "launch the CLI (command line interface)"
    if sys.version < "3.7":
        print("Python 3.7+ required")

    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-iff", "--interactive_file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (not with option --concat_lines)", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
    xParser.add_argument("-wss", "--with_spell_sugg", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-pdi", "--personal_dict", help="load personnal dictionary (JSON file)", type=str)
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
    xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
    xParser.add_argument("-sug", "--suggest", help="get suggestions list for given word", type=str)
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
    xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
    xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
    xArgs = xParser.parse_args()

    oGrammarChecker = grammalecte.GrammarChecker("fr")
    oSpellChecker = oGrammarChecker.getSpellChecker()
    oTextFormatter = oGrammarChecker.getTextFormatter()
    if xArgs.personal_dict:
        oJSON = loadDictionary(xArgs.personal_dict)
        if oJSON:

    if not xArgs.json:
        echo("Python v" + sys.version)
        echo("Grammalecte v{}".format(oGrammarChecker.gce.version))

    # list options or rules
    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
        if xArgs.list_rules:
            oGrammarChecker.gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)

    # spell suggestions
    if xArgs.suggest:
        for lSugg in oSpellChecker.suggest(xArgs.suggest):
            if xArgs.json:
                sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
                sText = "Suggestions : " + " | ".join(lSugg)

    # disable options
    if not xArgs.json:
        xArgs.context = False
    if xArgs.concat_lines:
        xArgs.textformatter = False

    # grammar options
    oGrammarChecker.gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        oGrammarChecker.gce.setOptions({ opt:True  for opt in xArgs.opt_on })
    if xArgs.opt_off:
        oGrammarChecker.gce.setOptions({ opt:False  for opt in xArgs.opt_off })

    # disable grammar rules
    if xArgs.rule_off:
        for sRule in xArgs.rule_off:

    if xArgs.file or xArgs.file_to_file:
        # file processing
        sFile = xArgs.file or xArgs.file_to_file
        hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n")  if xArgs.file_to_file or sys.platform == "win32"  else None
        bComma = False
        if xArgs.json:
            output('{ "grammalecte": "'+oGrammarChecker.gce.version+'", "lang": "'+oGrammarChecker.gce.lang+'", "data" : [\n', hDst)
        for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines):
            if xArgs.textformatter or xArgs.textformatteronly:
                sText = oTextFormatter.formatText(sText)
            if xArgs.textformatteronly:
                output(sText, hDst)
            if xArgs.json:
                sText = oGrammarChecker.getParagraphErrorsAsJSON(i, sText, bContext=xArgs.context, bEmptyIfNoErrors=xArgs.only_when_errors, \
                                                                bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter, lLineSet=lLineSet)
                sText, _ = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
            if sText:
                if xArgs.json and bComma:
                    output(",\n", hDst)
                output(sText, hDst)
                bComma = True
            if hDst:
                echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    elif xArgs.interactive_file_to_file:
        # file processing: interactive mode
        sFile = xArgs.interactive_file_to_file
        hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n")
        for i, sText, lLineSet in generateParagraphFromFile(sFile, xArgs.concat_lines):
            if xArgs.textformatter:
                sText = oTextFormatter.formatText(sText)
            while True:
                sResult, lErrors = oGrammarChecker.getParagraphWithErrors(sText, bEmptyIfNoErrors=False, bSpellSugg=True, nWidth=xArgs.width)
                print("\n\n============================== Paragraph " + str(i) + " ==============================\n")
                vCommand = getCommand()
                if vCommand == "q":
                    # quit
                elif vCommand == "n":
                    # next paragraph
                    nError, cAction, vSugg = vCommand
                    if 0 <= nError <= len(lErrors) - 1:
                        dErr = lErrors[nError]
                        if cAction == ">"  and  0 <= vSugg <= len(dErr["aSuggestions"]) - 1:
                            sSugg = dErr["aSuggestions"][vSugg]
                            sText = sText[0:dErr["nStart"]] + sSugg + sText[dErr["nEnd"]:]
                        elif cAction == "=":
                            sText = sText[0:dErr["nStart"]] + vSugg + sText[dErr["nEnd"]:]
                            print("Error. Action not possible.")
                        print("Error. This error doesn’t exist.")
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* " + sWord)
                        for sElem, aRes in oSpellChecker.analyze(sWord):
                            echo("  - " + sElem)
                            for sMorph, sMeaning in aRes:
                                echo("      {:<40}  {}".format(sMorph, sMeaning))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        for lSugg in oSpellChecker.suggest(sWord):
                            echo(" | ".join(lSugg))
            elif sText.startswith(">"):
            elif sText.startswith("="):
                sSearch = sText[1:].strip()
                if "=" in sSearch:
                    nCut = sSearch.find("=")
                    sFlexPattern = sSearch[0:nCut]
                    sTagsPattern = sSearch[nCut+1:]
                    sFlexPattern = sSearch
                    sTagsPattern = ""
                for aRes in, sTagsPattern):
                    echo("{:<30} {:<30} {}".format(*aRes))
            elif sText.startswith("≠"):
                lWords = sText[1:].split("|")
                for s1, s2 in itertools.combinations(lWords, 2):
                    nDist = strt.distanceDamerauLevenshtein(s1, s2)
                    print(f"{s1} ≠ {s2}: {nDist}")
            elif sText.startswith("/o+ "):
                oGrammarChecker.gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in oGrammarChecker.gce.getOptions() })
            elif sText.startswith("/o- "):
                oGrammarChecker.gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in oGrammarChecker.gce.getOptions() })
            elif sText.startswith("/r- "):
                for sRule in sText[3:].strip().split():
            elif sText.startswith("/r+ "):
                for sRule in sText[3:].strip().split():
            elif sText in ("/debug", "/d"):
                xArgs.debug = not xArgs.debug
                echo("debug mode on"  if xArgs.debug  else "debug mode off")
            elif sText in ("/textformatter", "/tf"):
                xArgs.textformatter = not xArgs.textformatter
                echo("textformatter on"  if xArgs.debug  else "textformatter off")
            elif sText in ("/help", "/h"):
            elif sText in ("/lopt", "/lo"):
            elif sText.startswith("/lr"):
                sText = sText.strip()
                sFilter = sText[sText.find(" "):].strip()  if " " in sText  else None
            elif sText in ("/quit", "/q"):
            elif sText.startswith("/rl"):
                # reload (todo)
            elif sText.startswith("$"):
                for sParagraph in txt.getParagraph(sText[1:]):
                    if xArgs.textformatter:
                        sParagraph = oTextFormatter.formatText(sParagraph)
                    lParagraphErrors, lSentences = oGrammarChecker.gce.parse(sParagraph, bDebug=xArgs.debug, bFullInfo=True)
                    #echo(txt.getReadableErrors(lParagraphErrors, xArgs.width))
                    for dSentence in lSentences:
                        echo("{nStart}:{nEnd}  <{sSentence}>".format(**dSentence))
                        for dToken in dSentence["lTokens"]:
                            if dToken["sType"] == "INFO" or "bMerged" in dToken:
                            echo("  {0[nStart]:>3}:{0[nEnd]:<3} {1} {0[sType]:<14} {2} {0[sValue]:<16} {3}".format(dToken, \
                                                                                                        "×" if dToken.get("bToRemove", False) else " ",
                                                                                                        "!" if dToken["sType"] == "WORD" and not dToken.get("bValidToken", False) else " ",
                                                                                                        " ".join(dToken.get("aTags", "")) ) )
                            if "lMorph" in dToken:
                                for sMorph, sLabel in zip(dToken["lMorph"], dToken["aLabels"]):
                                    echo("            {0:40}  {1}".format(sMorph, sLabel))
                            if "lSubTokens" in dToken:
                                for dSubToken in dToken["lSubTokens"]:
                                    if dSubToken["sValue"]:
                                        echo("              · {0:20}".format(dSubToken["sValue"]))
                                        for sMorph, sLabel in zip(dSubToken["lMorph"], dSubToken["aLabels"]):
                                            echo("                {0:40}  {1}".format(sMorph, sLabel))
                        #echo(txt.getReadableErrors(dSentence["lGrammarErrors"], xArgs.width))
                if sText.startswith("TEST: "):
                    sText = sText[6:]
                    sText = sText.replace("{{", "").replace("}}", "")
                    sText = re.sub(" ->> .*$", "", sText).rstrip()
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sParagraph = oTextFormatter.formatText(sParagraph)
                    sRes, _ = oGrammarChecker.getParagraphWithErrors(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug)
                    if sRes:
                        echo("\n" + sRes)
                        echo("\nNo error found.")
            sText = _getText(sInputText)

if __name__ == '__main__':