Grammalecte: grammalecte-cli.py at [8a66c7c437]

File grammalecte-cli.py artifact 07800caa2b part of check-in 8a66c7c437

#!/usr/bin/env python3

import sys
import os.path
import argparse
import json

import grammalecte.fr as gce
import grammalecte.fr.lexicographe as lxg
import grammalecte.fr.textformatter as tf
import grammalecte.text as txt
import grammalecte.graphspell.tokenizer as tkz
from grammalecte.graphspell.echo import echo


_EXAMPLE = "Quoi ? Racontes ! Racontes-moi ! Bon sangg, parles ! Oui. Il y a des menteur partout. " \
           "Je suit sidéré par la brutales arrogance de cette homme-là. Quelle salopard ! Un escrocs de la pire espece. " \
           "Quant sera t’il châtiés pour ses mensonge ?             Merde ! J’en aie marre."

_HELP = """
    /help                       /h      show this text
    ?word1 [word2] ...                  words analysis
    !word                               suggestion
    >word                               draw path of word in the word graph
    =filter                             show all entries whose morphology fits to filter
    /lopt                       /lo     list options
    /+ option1 [option2] ...            activate grammar checking options
    /- option1 [option2] ...            deactivate grammar checking options
    /lrules [pattern]           /lr     list rules
    /--rule1 [rule2] ...                deactivate grammar checking rule
    /++rule1 [rule2] ...                reactivate grammar checking rule
    /quit                       /q      exit
"""


def _getText (sInputText):
    sText = input(sInputText)
    if sText == "*":
        return _EXAMPLE
    if sys.platform == "win32":
        # Apparently, the console transforms «’» in «'».
        # So we reverse it to avoid many useless warnings.
        sText = sText.replace("'", "’")
    return sText


def _getErrors (sText, oTokenizer, oDict, bContext=False, bSpellSugg=False, bDebug=False):
    "returns a tuple: (grammar errors, spelling errors)"
    aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
    aSpellErrs = []
    for dToken in oTokenizer.genTokens(sText):
        if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']):
            if bSpellSugg:
                dToken['aSuggestions'] = oDict.suggest(dToken['sValue'])
            aSpellErrs.append(dToken)
    return aGrammErrs, aSpellErrs


def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bSpellSugg, bDebug)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)


def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSugg=False, lLineSet=None, bReturnText=False):
    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bSpellSugg, bDebug)
    aGrammErrs = list(aGrammErrs)
    if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
        return ""
    if lLineSet:
        aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
        return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    if bReturnText:
        return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)


def readfile (spf):
    "generator: returns file line by line"
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                yield sLine
    else:
        print("# Error: file not found.")


def readfileAndConcatLines (spf):
    "generator: returns text by list of lines not separated by an empty line"
    lLine = []
    for i, sLine in enumerate(readfile(spf), 1):
        if sLine.strip():
            lLine.append((i, sLine))
        elif lLine:
            yield lLine
            lLine = []
    if lLine:
        yield lLine


def output (sText, hDst=None):
    if not hDst:
        echo(sText, end="")
    else:
        hDst.write(sText)


def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
    xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
    xParser.add_argument("-wss", "--with_spell_sugg", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
    xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
    xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
    xParser.add_argument("-sug", "--suggest", help="get suggestions list for given word", type=str)
    xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
    xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
    xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
    xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
    xArgs = xParser.parse_args()

    gce.load()
    if not xArgs.json:
        echo("Grammalecte v{}".format(gce.version))
    oDict = gce.getDictionary()
    oTokenizer = tkz.Tokenizer("fr")
    oLexGraphe = lxg.Lexicographe(oDict)
    if xArgs.textformatter or xArgs.textformatteronly:
        oTF = tf.TextFormatter()

    if xArgs.list_options or xArgs.list_rules:
        if xArgs.list_options:
            gce.displayOptions("fr")
        if xArgs.list_rules:
            gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)
        exit()

    if xArgs.suggest:
        lSugg = oDict.suggest(xArgs.suggest)
        if xArgs.json:
            sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
        else:
            sText = "Suggestions : " + " | ".join(lSugg)
        echo(sText)
        exit()

    if not xArgs.json:
        xArgs.context = False

    gce.setOptions({"html": True, "latex": True})
    if xArgs.opt_on:
        gce.setOptions({ opt:True  for opt in xArgs.opt_on  if opt in gce.getOptions() })
    if xArgs.opt_off:
        gce.setOptions({ opt:False  for opt in xArgs.opt_off  if opt in gce.getOptions() })

    if xArgs.rule_off:
        for sRule in xArgs.rule_off:
            gce.ignoreRule(sRule)

    sFile = xArgs.file or xArgs.file_to_file
    if sFile:
        # file processing
        hDst = open(sFile[:sFile.rfind(".")]+".res.txt", "w", encoding="utf-8", newline="\n")  if xArgs.file_to_file or sys.platform == "win32"  else None
        bComma = False
        if xArgs.json:
            output('{ "grammalecte": "'+gce.version+'", "lang": "'+gce.lang+'", "data" : [\n', hDst)
        if not xArgs.concat_lines:
            # pas de concaténation des lignes
            for i, sText in enumerate(readfile(sFile), 1):
                if xArgs.textformatter or xArgs.textformatteronly:
                    sText = oTF.formatText(sText)
                if xArgs.textformatteronly:
                    output(sText, hDst)
                else:
                    if xArgs.json:
                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, bReturnText=xArgs.textformatter)
                    else:
                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
                    if sText:
                        if xArgs.json and bComma:
                            output(",\n", hDst)
                        output(sText, hDst)
                        bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        else:
            # concaténation des lignes non séparées par une ligne vide
            for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
                sText, lLineSet = txt.createParagraphWithLines(lLine)
                if xArgs.json:
                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, lLineSet=lLineSet)
                else:
                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSugg=xArgs.with_spell_sugg, nWidth=xArgs.width)
                if sText:
                    if xArgs.json and bComma:
                        output(",\n", hDst)
                    output(sText, hDst)
                    bComma = True
                if hDst:
                    echo("§ %d\r" % i, end="", flush=True)
        if xArgs.json:
            output("\n]}\n", hDst)
    else:
        # pseudo-console
        sInputText = "\n~==========~ Enter your text [/h /q] ~==========~\n"
        sText = _getText(sInputText)
        while True:
            if sText.startswith("?"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo(" | ".join(oDict.suggest(sWord)))
                        #echo(" | ".join(oDict.suggest2(sWord)))
            elif sText.startswith(">"):
                oDict.drawPath(sText[1:].strip())
            elif sText.startswith("="):
                for sRes in oDict.select(sText[1:].strip()):
                    echo(sRes)
            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):
                for sRule in sText[3:].strip().split():
                    gce.ignoreRule(sRule)
                echo("done")
            elif sText.startswith("/++ "):
                for sRule in sText[3:].strip().split():
                    gce.reactivateRule(sRule)
                echo("done")
            elif sText == "/debug" or sText == "/d":
                xArgs.debug = not(xArgs.debug)
                echo("debug mode on"  if xArgs.debug  else "debug mode off")
            elif sText == "/textformatter" or sText == "/tf":
                xArgs.textformatter = not(xArgs.textformatter)
                echo("textformatter on"  if xArgs.debug  else "textformatter off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/lo":
                gce.displayOptions("fr")
            elif sText.startswith("/lr"):
                sText = sText.strip()
                sFilter = sText[sText.find(" "):].strip()  if sText != "/lr" and sText != "/rules"  else None
                gce.displayRules(sFilter)
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTF.formatText(sText)
                    sRes = generateText(sText, oTokenizer, oDict, bDebug=xArgs.debug, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)


if __name__ == '__main__':
    main()
Grammalecte grammalecte-cli.py at [8a66c7c437]

File grammalecte-cli.py artifact 07800caa2b part of check-in 8a66c7c437