Changes In Branch rg Through [dc7c85fc4b] Excluding Merge-Ins
This is equivalent to a diff from cb3f319c08 to dc7c85fc4b
| 2018-08-06 | ||
| 20:11 | [core] gc engine: tag() check-in: 0187a02172 user: olr tags: core, rg | |
| 19:59 | [fr] conversion: regex rules -> graph rules check-in: dc7c85fc4b user: olr tags: fr, rg | |
| 16:27 | [fr] conversion: regex rules -> graph rules check-in: c1c1ac5e30 user: olr tags: fr, rg | |
| 2018-06-25 | ||
| 07:58 | [fr] faux positif: en tant que président du conseil (trailing spaces automatically removed) check-in: 37fb199673 user: olr tags: trunk, fr | |
| 2018-06-24 | ||
| 19:03 | merge trunk check-in: 099647c959 user: olr tags: rg | |
| 2018-06-22 | ||
| 07:46 | [cli] option to load personal dictionary check-in: cb3f319c08 user: olr tags: trunk, cli | |
| 2018-06-15 | ||
| 20:44 | [fr] faux positif: accord de laisser avec les pronoms sans impératif check-in: 24d41be12e user: olr tags: trunk, fr | |
Modified compile_rules.py from [1ea2b6d97a] to [e8250665d2].
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | 
import re
import traceback
import json
import compile_rules_js_convert as jsconv
dDEF = {}
lFUNCTIONS = []
aRULESET = set()     # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0
dJSREGEXES = {}
sWORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
sWORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
def prepareFunction (s):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"isStart *\(\)", 'before("^ *$|, *$")', s)
    s = re.sub(r"isRealStart *\(\)", 'before("^ *$")', s)
    s = re.sub(r"isStart0 *\(\)", 'before0("^ *$|, *$")', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s)
    s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s)
    s = re.sub(r"isRealEnd *\(\)", 'after("^ *$")', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0("^ *$|^,")', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0("^ *$")', s)
 | > > > > > | | | | | | | | | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | 
"""
Grammalecte: compile rules
"""
import re
import traceback
import json
import compile_rules_js_convert as jsconv
import compile_rules_graph as crg
dDEF = {}
lFUNCTIONS = []
aRULESET = set()     # set of rule-ids to check if there is several rules with the same id
nRULEWITHOUTNAME = 0
dJSREGEXES = {}
sWORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
sWORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
def prepareFunction (s):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"isStart *\(\)", 'before("^ *$|, *$")', s)
    s = re.sub(r"isRealStart *\(\)", 'before("^ *$")', s)
    s = re.sub(r"isStart0 *\(\)", 'before0("^ *$|, *$")', s)
    s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s)
    s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s)
    s = re.sub(r"isRealEnd *\(\)", 'after("^ *$")', s)
    s = re.sub(r"isEnd0 *\(\)", 'after0("^ *$|^,")', s)
    s = re.sub(r"isRealEnd0 *\(\)", 'after0("^ *$")', s)
    s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(dTokenPos, m.start(\\2), m.group(\\2)', s)
    s = re.sub(r"define[(][\\](\d+)", 'define(dTokenPos, m.start(\\1)', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(][\\](\d+)", '\\1((m.start(\\2), m.group(\\2))', s)
    s = re.sub(r"(morph|morphex|displayInfo)[(]", '\\1(dTokenPos, ', s)
    s = re.sub(r"(sugg\w+|switch\w+)\(@", '\\1(m.group(i[4])', s)
    s = re.sub(r"word\(\s*1\b", 'nextword1(s, m.end()', s)                                  # word(1)
    s = re.sub(r"word\(\s*-1\b", 'prevword1(s, m.start()', s)                               # word(-1)
    s = re.sub(r"word\(\s*(\d)", 'nextword(s, m.end(), \\1', s)                             # word(n)
    s = re.sub(r"word\(\s*-(\d)", 'prevword(s, m.start(), \\1', s)                          # word(-n)
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dTokenPos, s[:m.start()], 0, ', s)           # before_chk1(s)
    s = re.sub(r"after_chk1\(\s*", 'look_chk1(dTokenPos, s[m.end():], m.end(), ', s)        # after_chk1(s)
    s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dTokenPos, s, 0, ', s)                     # textarea_chk1(s)
    s = re.sub(r"/0", 'sx[m.start():m.end()]', s)                                           # /0
    s = re.sub(r"before0\(\s*", 'look(sx[:m.start()], ', s)                                 # before0(s)
    s = re.sub(r"after0\(\s*", 'look(sx[m.end():], ', s)                                    # after0(s)
    s = re.sub(r"textarea0\(\s*", 'look(sx, ', s)                                           # textarea0(s)
    s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dTokenPos, sx[:m.start()], 0, ', s)         # before0_chk1(s)
    s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dTokenPos, sx[m.end():], m.end(), ', s)      # after0_chk1(s)
    s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dTokenPos, sx, 0, ', s)                   # textarea0_chk1(s)
    s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dTokenPos, s[m.end():], m.end())', s)        # isEndOfNG(s)
    s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dTokenPos, s[m.end():], m.end())', s)  # isNextNotCOD(s)
    s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dTokenPos, s[m.end():], m.end())', s)      # isNextVerb(s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
    return s
def uppercase (s, sLang):
    "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
 | 
| ︙ | ︙ | |||
| 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | 
            nState = 4
        elif nState == 4:
            nState = 0
    return sUp
def countGroupInRegex (sRegex):
    try:
        return re.compile(sRegex).groups
    except:
        traceback.print_exc()
        print(sRegex)
    return 0
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME
 | > < > > > > > > > > > | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | 
            nState = 4
        elif nState == 4:
            nState = 0
    return sUp
def countGroupInRegex (sRegex):
    "returns the number of groups in <sRegex>"
    try:
        return re.compile(sRegex).groups
    except:
        traceback.print_exc()
        print(sRegex)
    return 0
def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
    "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
    global dJSREGEXES
    global nRULEWITHOUTNAME
    sLineId = str(nIdLine) + ("p" if bParagraph else "s")
    sRuleId = sLineId
    #### GRAPH CALL
    if s.startswith("@@@@"):
        if bParagraph:
            print("Error. Graph call can be made only after the first pass (sentence by sentence)")
            exit()
        return ["@@@@", s[4:], sLineId]
    #### OPTIONS
    sOption = False         # False or [a-z0-9]+ name
    nPriority = 4           # Default is 4, value must be between 0 and 9
    tGroups = None          # code for groups positioning (only useful for JavaScript)
    cCaseMode = 'i'         # i: case insensitive,  s: case sensitive,  u: uppercasing allowed
    cWordLimitLeft = '['    # [: word limit, <: no specific limit
    cWordLimitRight = ']'   # ]: word limit, >: no specific limit
    m = re.match("^__(?P<borders_and_case>[\\[<]\\w[\\]>])(?P<option>/[a-zA-Z0-9]+|)(?P<ruleid>\\(\\w+\\)|)(?P<priority>![0-9]|)__ *", s)
    if m:
        cWordLimitLeft = m.group('borders_and_case')[0]
        cCaseMode = m.group('borders_and_case')[1]
        cWordLimitRight = m.group('borders_and_case')[2]
        sOption = m.group('option')[1:]  if m.group('option')  else False
        if m.group('ruleid'):
            sRuleId =  m.group('ruleid')[1:-1]
 | 
| ︙ | ︙ | |||
| 145 146 147 148 149 150 151 | 
    #### REGEX TRIGGER
    i = s.find(" <<-")
    if i == -1:
        print("# Error: no condition at line " + sLineId)
        return None
    sRegex = s[:i].strip()
    s = s[i+4:]
 | | | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | 
    #### REGEX TRIGGER
    i = s.find(" <<-")
    if i == -1:
        print("# Error: no condition at line " + sLineId)
        return None
    sRegex = s[:i].strip()
    s = s[i+4:]
    # JS groups positioning codes
    m = re.search("@@\\S+", sRegex)
    if m:
        tGroups = jsconv.groupsPositioningCodeToList(sRegex[m.start()+2:])
        sRegex = sRegex[:m.start()].strip()
    # JS regex
    m = re.search("<js>.+</js>i?", sRegex)
 | 
| ︙ | ︙ | |||
| 200 201 202 203 204 205 206 | 
        sRegex = sRegex.replace("(?i)", "")
        sRegex = uppercase(sRegex, sLang)
    else:
        print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId)
    ## check regex
    try:
 | | | > > > > > > > > > > > > > > > < < | | < < | | > | < < < | < < | < < | > < < < < < | | > > > | | | > < < | | < < | | | | > | | < < < > > | | > | 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 | 
        sRegex = sRegex.replace("(?i)", "")
        sRegex = uppercase(sRegex, sLang)
    else:
        print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId)
    ## check regex
    try:
        re.compile(sRegex)
    except:
        print("# Regex error at line ", nIdLine)
        print(sRegex)
        traceback.print_exc()
        return None
    ## groups in non grouping parenthesis
    for x in re.finditer(r"\(\?:[^)]*\([\[\w -]", sRegex):
        print("# Warning: groups inside non grouping parenthesis in regex at line " + sLineId)
    #### PARSE ACTIONS
    lActions = []
    nAction = 1
    for sAction in s.split(" <<- "):
        t = createAction(sRuleId + "_" + str(nAction), sAction, nGroup)
        nAction += 1
        if t:
            lActions.append(t)
    if not lActions:
        return None
    return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups]
def checkReferenceNumbers (sText, sActionId, nToken):
    "check if token references in <sText> greater than <nToken> (debugging)"
    for x in re.finditer(r"\\(\d+)", sText):
        if int(x.group(1)) > nToken:
            print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
            print(sText)
def checkIfThereIsCode (sText, sActionId):
    "check if there is code in <sText> (debugging)"
    if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText):
        print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
        print(sText)
def createAction (sIdAction, sAction, nGroup):
    "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])"
    m = re.search(r"([-~=>])(\d*|)>>", sAction)
    if not m:
        print("# No action at line " + sIdAction)
        return None
    #### CONDITION
    sCondition = sAction[:m.start()].strip()
    if sCondition:
        sCondition = prepareFunction(sCondition)
        lFUNCTIONS.append(("_c_"+sIdAction, sCondition))
        checkReferenceNumbers(sCondition, sIdAction, nGroup)
        if ".match" in sCondition:
            print("# Error. JS compatibility. Don't use .match() in condition, use .search()")
        sCondition = "_c_"+sIdAction
    else:
        sCondition = None
    #### iGroup / positioning
    iGroup = int(m.group(2)) if m.group(2) else 0
    if iGroup > nGroup:
        print("# Selected group > group number in regex at line " + sIdAction)
    #### ACTION
    sAction = sAction[m.end():].strip()
    cAction = m.group(1)
    if cAction == "-":
        ## error
        iMsg = sAction.find(" # ")
        if iMsg == -1:
            sMsg = "# Error. Error message not found."
            sURL = ""
            print(sMsg + " Action id: " + sIdAction)
        else:
            sMsg = sAction[iMsg+3:].strip()
            sAction = sAction[:iMsg].strip()
            sURL = ""
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            checkReferenceNumbers(sMsg, sIdAction, nGroup)
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                lFUNCTIONS.append(("_m_"+sIdAction, sMsg))
                sMsg = "=_m_"+sIdAction
            else:
                checkIfThereIsCode(sMsg, sIdAction)
    checkReferenceNumbers(sAction, sIdAction, nGroup)
    if sAction[0:1] == "=" or cAction == "=":
        sAction = prepareFunction(sAction)
        sAction = sAction.replace("m.group(i[4])", "m.group("+str(iGroup)+")")
    else:
        checkIfThereIsCode(sAction, sIdAction)
    if cAction == ">":
        ## no action, break loop if condition is False
        return [sCondition, cAction, ""]
    if not sAction:
        print("# Error in action at line " + sIdAction + ":  This action is empty.")
        return None
    if cAction == "-":
        ## error detected --> suggestion
        if sAction[0:1] == "=":
            lFUNCTIONS.append(("_s_"+sIdAction, sAction[1:]))
            sAction = "=_s_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sIdAction + ":  the message is empty.")
        return [sCondition, cAction, sAction, iGroup, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            lFUNCTIONS.append(("_p_"+sIdAction, sAction[1:]))
            sAction = "=_p_"+sIdAction
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sCondition, cAction, sAction, iGroup]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if "define" in sAction and not re.search(r"define\(dTokenPos, *m\.start.*, \[.*\] *\)", sAction):
            print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
            print(sAction)
        lFUNCTIONS.append(("_d_"+sIdAction, sAction))
        sAction = "_d_"+sIdAction
        return [sCondition, cAction, sAction]
    else:
        print("# Unknown action at line " + sIdAction)
        return None
def _calcRulesStats (lRules):
    "count rules and actions"
    d = {'=':0, '~': 0, '-': 0, '>': 0}
    for aRule in lRules:
        if aRule[0] != "@@@@":
            for aAction in aRule[6]:
                d[aAction[1]] = d[aAction[1]] + 1
    return (d, len(lRules))
def displayStats (lParagraphRules, lSentenceRules):
    "display rules numbers"
    print("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
    d, nRule = _calcRulesStats(lParagraphRules)
    print("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
    d, nRule = _calcRulesStats(lSentenceRules)
    print("s {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
 | 
| ︙ | ︙ | |||
| 389 390 391 392 393 394 395 | 
            m = re.match("OPTGROUP/([a-z0-9]+):(.+)$", sLine)
            lStructOpt.append( (m.group(1), list(map(str.split, m.group(2).split(",")))) )
        elif sLine.startswith("OPTSOFTWARE:"):
            lOpt = [ [s, {}]  for s in sLine[12:].strip().split() ]  # don’t use tuples (s, {}), because unknown to JS
        elif sLine.startswith("OPT/"):
            m = re.match("OPT/([a-z0-9]+):(.+)$", sLine)
            for i, sOpt in enumerate(m.group(2).split()):
 | | | 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | 
            m = re.match("OPTGROUP/([a-z0-9]+):(.+)$", sLine)
            lStructOpt.append( (m.group(1), list(map(str.split, m.group(2).split(",")))) )
        elif sLine.startswith("OPTSOFTWARE:"):
            lOpt = [ [s, {}]  for s in sLine[12:].strip().split() ]  # don’t use tuples (s, {}), because unknown to JS
        elif sLine.startswith("OPT/"):
            m = re.match("OPT/([a-z0-9]+):(.+)$", sLine)
            for i, sOpt in enumerate(m.group(2).split()):
                lOpt[i][1][m.group(1)] = eval(sOpt)
        elif sLine.startswith("OPTPRIORITY/"):
            m = re.match("OPTPRIORITY/([a-z0-9]+): *([0-9])$", sLine)
            dOptPriority[m.group(1)] = int(m.group(2))
        elif sLine.startswith("OPTLANG/"):
            m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine)
            sLang = m.group(1)[:2]
            dOptLabel[sLang] = { "__optiontitle__": m.group(2).strip() }
 | 
| ︙ | ︙ | |||
| 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 | 
    print("  options defined for: " + ", ".join([ t[0] for t in lOpt ]))
    dOptions = { "lStructOpt": lStructOpt, "dOptLabel": dOptLabel, "sDefaultUILang": sDefaultUILang }
    dOptions.update({ "dOpt"+k: v  for k, v in lOpt })
    return dOptions, dOptPriority
def printBookmark (nLevel, sComment, nLine):
    print("  {:>6}:  {}".format(nLine, "  " * nLevel + sComment))
def make (spLang, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here
    print("> read rules file...")
    try:
        lRules = open(spLang + "/rules.grx", 'r', encoding="utf-8").readlines()
    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
 | > < < | | > > | < | < < < > > > < < | > | > > > > > > > > > > > > > > > > > > > > > > | > | 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 | 
    print("  options defined for: " + ", ".join([ t[0] for t in lOpt ]))
    dOptions = { "lStructOpt": lStructOpt, "dOptLabel": dOptLabel, "sDefaultUILang": sDefaultUILang }
    dOptions.update({ "dOpt"+k: v  for k, v in lOpt })
    return dOptions, dOptPriority
def printBookmark (nLevel, sComment, nLine):
    "print bookmark within the rules file"
    print("  {:>6}:  {}".format(nLine, "  " * nLevel + sComment))
def make (spLang, sLang, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here
    print("> read rules file...")
    try:
        lRules = open(spLang + "/rules.grx", 'r', encoding="utf-8").readlines()
    except:
        print("Error. Rules file in project [" + sLang + "] not found.")
        exit()
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    lRuleLine = []
    lTest = []
    lOpt = []
    bGraph = False
    lGraphRule = []
    for i, sLine in enumerate(lRules, 1):
        if sLine.startswith('#END'):
            # arbitrary end
            printBookmark(0, "BREAK BY #END", i)
            break
        elif sLine.startswith("#"):
            # comment
            pass
        elif sLine.startswith("DEF:"):
            # definition
            m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
            if m:
                dDEF["{"+m.group(1)+"}"] = m.group(2)
            else:
                print("Error in definition: ", end="")
                print(sLine.strip())
        elif sLine.startswith("TEST:"):
            # test
            lTest.append("{:<8}".format(i) + "  " + sLine[5:].strip())
        elif sLine.startswith("TODO:"):
            # todo
            pass
        elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
            # options
            lOpt.append(sLine)
        elif sLine.startswith("!!"):
            # bookmark
            m = re.match("!!+", sLine)
            nExMk = len(m.group(0))
            if sLine[nExMk:].strip():
                printBookmark(nExMk-2, sLine[nExMk:-3].strip(), i)
        # Graph rules
        elif sLine.startswith("@@@@GRAPH:"):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip())
            if m:
                printBookmark(1, "____ GRAPH: " + m.group(1) + " ____", i)
                lRuleLine.append([i, "@@@@"+m.group(1)])
                bGraph = True
            lGraphRule.append([i, sLine])
            bGraph = True
        elif sLine.startswith("@@@@END_GRAPH"):
            #lGraphRule.append([i, sLine])
            bGraph = False
        elif re.match("@@@@ *$", sLine):
            pass
        elif bGraph:
            lGraphRule.append([i, sLine])
        # Regex rules
        elif re.match("[  \t]*$", sLine):
            # empty line
            pass
        elif sLine.startswith(("    ", "\t")):
            # rule (continuation)
            lRuleLine[-1][1] += " " + sLine.strip()
        else:
            # new rule
            lRuleLine.append([i, sLine.strip()])
    # generating options files
    print("  parsing options...")
    try:
        dOptions, dOptPriority = prepareOptions(lOpt)
    except:
 | 
| ︙ | ︙ | |||
| 513 514 515 516 517 518 519 | 
                        lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
 | < | | | | | | | | | | | | | | | | > > > > | | 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 | 
                        lSentenceRulesJS.append(jsconv.pyRuleToJS(aRule, dJSREGEXES, sWORDLIMITLEFT))
    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("_c_"): # condition
            sParams = "s, sx, m, dTokenPos, sCountry, bCondMemo"
        elif sFuncName.startswith("_m_"): # message
            sParams = "s, m"
        elif sFuncName.startswith("_s_"): # suggestion
            sParams = "s, m"
        elif sFuncName.startswith("_p_"): # preprocessor
            sParams = "s, m"
        elif sFuncName.startswith("_d_"): # disambiguator
            sParams = "s, m, dTokenPos"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue
        sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
        sPyCallables += "    return " + sReturn + "\n"
        sJSCallables += "    {}: function ({})".format(sFuncName, sParams) + " {\n"
        sJSCallables += "        return " + jsconv.py2js(sReturn) + ";\n"
        sJSCallables += "    },\n"
    sJSCallables += "}\n"
    displayStats(lParagraphRules, lSentenceRules)
    print("Unnamed rules: " + str(nRULEWITHOUTNAME))
    dVars = {   "callables": sPyCallables,
                "callablesJS": sJSCallables,
                "gctests": sGCTests,
                "gctestsJS": sGCTestsJS,
                "paragraph_rules": mergeRulesByOption(lParagraphRules),
                "sentence_rules": mergeRulesByOption(lSentenceRules),
                "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
                "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
    dVars.update(dOptions)
    # compile graph rules
    dVars2 = crg.make(lGraphRule, dDEF, sLang, dOptPriority, bJavaScript)
    dVars.update(dVars2)
    return dVars
 | 
Added compile_rules_graph.py version [47c53113f4].
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 | 
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""
import re
import traceback
import json
import darg
dACTIONS = {}
dFUNCTIONS = {}
def prepareFunction (s):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = s.replace("sContext", "_sAppContext")
    s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
    s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
    s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
    s = re.sub(r"(select|exclude|define|define_from)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
    s = re.sub(r"space_after[(][\\]-(\d+)", 'g_space_between_tokens(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
    s = re.sub(r"analyse_with_next[(][\\]-(\d+)", 'g_merged_analyse(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"(morph|analyse|value)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', s)          # next token
    s = re.sub(r"(morph|analyse|value)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', s)      # previous token
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)
    s = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                 # after(s)
    s = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', s)        # before0(s)
    s = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                # after0(s)
    s = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', s)
    s = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', s)
    return s
def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
    for sToken in lToken:
        # optional token?
        bNullPossible = sToken.startswith("?") and sToken.endswith("¿")
        if bNullPossible:
            sToken = sToken[1:-1]
        # token with definition?
        if sToken.startswith("({") and sToken.endswith("})") and sToken[1:-1] in dDef:
            sToken = "(" + dDef[sToken[1:-1]] + ")"
        elif sToken.startswith("{") and sToken.endswith("}") and sToken in dDef:
            sToken = dDef[sToken]
        if ( (sToken.startswith("[") and sToken.endswith("]")) or (sToken.startswith("([") and sToken.endswith("])")) ):
            # multiple token
            bSelectedGroup = sToken.startswith("(") and sToken.endswith(")")
            if bSelectedGroup:
                sToken = sToken[1:-1]
            lNewToken = sToken[1:-1].split("|")
            if not lTokenLines:
                lTokenLines = [ ["("+s+")"]  for s  in lNewToken ]  if bSelectedGroup  else [ [s]  for s  in lNewToken ]
                if bNullPossible:
                    lTokenLines.extend([ []  for i  in range(len(lNewToken)+1) ])
            else:
                lNewTemp = []
                if bNullPossible:
                    for aRule in lTokenLines:
                        for sElem in lNewToken:
                            aNewRule = list(aRule)
                            aNewRule.append(sElem)
                            lNewTemp.append(aNewRule)
                else:
                    sElem1 = lNewToken.pop(0)
                    for aRule in lTokenLines:
                        for sElem in lNewToken:
                            aNewRule = list(aRule)
                            aNewRule.append("(" + sElem + ")"  if bSelectedGroup  else sElem)
                            lNewTemp.append(aNewRule)
                        aRule.append("(" + sElem1 + ")"  if bSelectedGroup  else sElem1)
                lTokenLines.extend(lNewTemp)
        else:
            # simple token
            if not lTokenLines:
                lTokenLines = [[sToken], []]  if bNullPossible  else [[sToken]]
            else:
                if bNullPossible:
                    lNewTemp = []
                    for aRule in lTokenLines:
                        lNew = list(aRule)
                        lNew.append(sToken)
                        lNewTemp.append(lNew)
                    lTokenLines.extend(lNewTemp)
                else:
                    for aRule in lTokenLines:
                        aRule.append(sToken)
    for aRule in lTokenLines:
        yield aRule
def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dOptPriority, dDef):
    "generator: create rule as list"
    # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
    for lToken in genTokenLines(sTokenLine, dDef):
        # Calculate positions
        dPos = {}   # key: iGroup, value: iToken
        iGroup = 0
        #if iLine == 3971: # debug
        #    print(lToken)
        for i, sToken in enumerate(lToken):
            if sToken.startswith("(") and sToken.endswith(")"):
                lToken[i] = sToken[1:-1]
                iGroup += 1
                dPos[iGroup] = i + 1    # we add 1, for we count tokens from 1 to n (not from 0)
        # Parse actions
        for iAction, sAction in enumerate(sActions.split(" <<- ")):
            sAction = sAction.strip()
            if sAction:
                sActionId = sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) + "_" + str(len(lToken))
                aAction = createAction(sActionId, sAction, nPriority, dOptPriority, len(lToken), dPos)
                if aAction:
                    dACTIONS[sActionId] = aAction
                    lResult = list(lToken)
                    lResult.extend(["##"+str(iLine), sActionId])
                    yield lResult
                else:
                    print(" # Error on action at line:", iLine)
def changeReferenceToken (sText, dPos):
    "change group reference in <sText> with values in <dPos>"
    for i in range(len(dPos), 0, -1):
        sText = sText.replace("\\"+str(i), "\\"+str(dPos[i]))
    return sText
def checkTokenNumbers (sText, sActionId, nToken):
    "check if token references in <sText> greater than <nToken> (debugging)"
    for x in re.finditer(r"\\(\d+)", sText):
        if int(x.group(1)) > nToken:
            print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
            print(sText)
def checkIfThereIsCode (sText, sActionId):
    "check if there is code in <sText> (debugging)"
    if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText):
        print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
        print(sText)
def createAction (sActionId, sAction, nPriority, dOptPriority, nToken, dPos):
    "create action rule as a list"
    # Option
    sOption = False
    m = re.match("/(\\w+)/", sAction)
    if m:
        sOption = m.group(1)
        sAction = sAction[m.end():].strip()
    if nPriority == -1:
        nPriority = dOptPriority.get(sOption, 4)
    # valid action?
    m = re.search(r"(?P<action>[-~=/%>])(?P<start>-?\d+\.?|)(?P<end>:\.?-?\d+|)(?P<casing>:|)>>", sAction)
    if not m:
        print(" # Error. No action found at: ", sActionId)
        return None
    # Condition
    sCondition = sAction[:m.start()].strip()
    if sCondition:
        sCondition = changeReferenceToken(sCondition, dPos)
        sCondition = prepareFunction(sCondition)
        dFUNCTIONS["_g_c_"+sActionId] = sCondition
        sCondition = "_g_c_"+sActionId
    else:
        sCondition = ""
    # Case sensitivity
    bCaseSensitivity = False if m.group("casing") == ":" else True
    # Action
    cAction = m.group("action")
    sAction = sAction[m.end():].strip()
    sAction = changeReferenceToken(sAction, dPos)
    # target
    cStartLimit = "<"
    cEndLimit = ">"
    if not m.group("start"):
        iStartAction = 1
        iEndAction = 0
    else:
        if cAction != "-" and (m.group("start").endswith(".") or m.group("end").startswith(":.")):
            print(" # Error. Wrong selection on tokens.", sActionId)
            return None
        if m.group("start").endswith("."):
            cStartLimit = ">"
        iStartAction = int(m.group("start").rstrip("."))
        if not m.group("end"):
            iEndAction = iStartAction
        else:
            if m.group("end").startswith(":."):
                cEndLimit = "<"
            iEndAction = int(m.group("end").lstrip(":."))
    if dPos and m.group("start"):
        try:
            iStartAction = dPos.get(iStartAction, iStartAction)
            if iEndAction:
                iEndAction = dPos.get(iEndAction, iEndAction)
        except:
            print("# Error. Wrong groups in: " + sActionId)
            print("  iStartAction:", iStartAction, "iEndAction:", iEndAction)
            print(" ", dPos)
    if iStartAction < 0:
        iStartAction += 1
    if iEndAction < 0:
        iEndAction += 1
    if cAction == "-":
        ## error
        iMsg = sAction.find(" # ")
        if iMsg == -1:
            sMsg = "# Error. Error message not found."
            sURL = ""
            print(sMsg + " Action id: " + sActionId)
        else:
            sMsg = sAction[iMsg+3:].strip()
            sAction = sAction[:iMsg].strip()
            sURL = ""
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            checkTokenNumbers(sMsg, sActionId, nToken)
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                dFUNCTIONS["g_m_"+sActionId] = sMsg
                sMsg = "=g_m_"+sActionId
            else:
                checkIfThereIsCode(sMsg, sActionId)
    # checking consistancy
    checkTokenNumbers(sAction, sActionId, nToken)
    if cAction == ">":
        ## no action, break loop if condition is False
        return [sOption, sCondition, cAction, ""]
    if not sAction:
        print("# Error in action at line " + sActionId + ":  This action is empty.")
    if sAction[0:1] != "=" and cAction != "=":
        checkIfThereIsCode(sAction, sActionId)
    if cAction == "-":
        ## error detected --> suggestion
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction)
            dFUNCTIONS["_g_s_"+sActionId] = sAction[1:]
            sAction = "=_g_s_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sActionId + ":  The message is empty.")
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction, cStartLimit, cEndLimit, bCaseSensitivity, nPriority, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction)
            dFUNCTIONS["_g_p_"+sActionId] = sAction[1:]
            sAction = "=_g_p_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction, bCaseSensitivity]
    elif cAction == "%" or cAction == "/":
        ## tags
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if "define(" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings")
        sAction = prepareFunction(sAction)
        dFUNCTIONS["_g_d_"+sActionId] = sAction
        sAction = "_g_d_"+sActionId
        return [sOption, sCondition, cAction, sAction]
    else:
        print(" # Unknown action.", sActionId)
        return None
def make (lRule, dDef, sLang, dOptPriority, bJavaScript):
    "compile rules, returns a dictionary of values"
    # for clarity purpose, don’t create any file here
    # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
    print("  parsing rules...")
    lTokenLine = []
    sActions = ""
    nPriority = -1
    dAllGraph = {}
    sGraphName = ""
    iActionBlock = 0
    for i, sLine in lRule:
        sLine = sLine.rstrip()
        if "\t" in sLine:
            # tabulation not allowed
            print("Error. Tabulation at line: ", i)
            exit()
        elif sLine.startswith("@@@@GRAPH: "):
            # rules graph call
            m = re.match(r"@@@@GRAPH: *(\w+)", sLine.strip())
            if m:
                sGraphName = m.group(1)
                if sGraphName in dAllGraph:
                    print("Error. Graph name " + sGraphName + " already exists.")
                    exit()
                dAllGraph[sGraphName] = []
            else:
                print("Error. Graph name not found at line", i)
                exit()
        elif sLine.startswith("__") and sLine.endswith("__"):
            # new rule group
            m = re.match("__(\\w+)(!\\d|)__", sLine)
            if m:
                sRuleName = m.group(1)
                iActionBlock = 1
                nPriority = int(m.group(2)[1:]) if m.group(2)  else -1
            else:
                print("Syntax error in rule group: ", sLine, " -- line:", i)
                exit()
        elif re.search("^    +<<- ", sLine) or sLine.startswith("        ") \
                or re.search("^    +#", sLine) or re.search(r"^    [-~=>/](?:\d\.?(?::\.?\d+|)|)>> ", sLine) :
            # actions
            sActions += " " + sLine.strip()
        elif re.match("[  ]*$", sLine):
            # empty line to end merging
            if not lTokenLine:
                continue
            if not sActions:
                print("Error. No action found at line:", i)
                exit()
            if not sGraphName:
                print("Error. All rules must belong to a named graph. Line: ", i)
                exit()
            for j, sTokenLine in lTokenLine:
                dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, iActionBlock, sActions, nPriority))
            lTokenLine.clear()
            sActions = ""
            iActionBlock += 1
        elif sLine.startswith(("    ")):
            # tokens
            lTokenLine.append([i, sLine.strip()])
        else:
            print("Unknown line:")
            print(sLine)
    # processing rules
    print("  preparing rules...")
    for sGraphName, lRuleLine in dAllGraph.items():
        lPreparedRule = []
        for i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority in lRuleLine:
            for lRule in createRule(i, sRuleGroup, sTokenLine, iActionBlock, sActions, nPriority, dOptPriority, dDef):
                lPreparedRule.append(lRule)
        # Graph creation
        oDARG = darg.DARG(lPreparedRule, sLang)
        dAllGraph[sGraphName] = oDARG.createGraph()
        # Debugging
        if False:
            print("\nRULES:")
            for e in lPreparedRule:
                if e[-2] == "##2211":
                    print(e)
        if False:
            print("\nGRAPH:", sGraphName)
            for k, v in dAllGraph[sGraphName].items():
                print(k, "\t", v)
    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in dFUNCTIONS.items():
        if sFuncName.startswith("_g_c_"): # condition
            sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_s_"): # suggestion
            sParams = "lToken, nTokenOffset, nLastToken"
        elif sFuncName.startswith("_g_p_"): # preprocessor
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_d_"): # disambiguator
            sParams = "lToken, nTokenOffset"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue
        sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
        sPyCallables += "    return " + sReturn + "\n"
        #sJSCallables += "    {}: function ({})".format(sFuncName, sParams) + " {\n"
        #sJSCallables += "        return " + jsconv.py2js(sReturn) + ";\n"
        #sJSCallables += "    },\n"
    #sJSCallables += "}\n"
    # Debugging
    if False:
        print("\nActions:")
        for sActionName, aAction in dACTIONS.items():
            print(sActionName, aAction)
        print("\nFunctions:")
        print(sPyCallables)
    # Result
    return {
        "graph_callables": sPyCallables,
        "rules_graphs": dAllGraph,
        "rules_actions": dACTIONS
    }
 | 
Modified compile_rules_js_convert.py from [5ad87f3f46] to [9aa0239064].
| 
 | 
 | > | > | 1 2 3 4 5 6 7 8 9 10 | """ Convert Python code and regexes to JavaScript code """ import copy import re import json def py2js (sCode): | 
| ︙ | ︙ | |||
| 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | 
        sRegex = sRegex + "i"
    if not lNegLookBeforeRegex:
        lNegLookBeforeRegex = None
    return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
    lRuleJS = copy.deepcopy(lRule)
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS
def writeRulesToJSArray (lRules):
    sArray = "[\n"
    for sOption, aRuleGroup in lRules:
 | > > > > > > | | | | | | | | | | | > > > > > > | 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | 
        sRegex = sRegex + "i"
    if not lNegLookBeforeRegex:
        lNegLookBeforeRegex = None
    return (sRegex, lNegLookBeforeRegex)
def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
    "modify Python rules -> JS rules"
    lRuleJS = copy.deepcopy(lRule)
    # graph rules
    if lRuleJS[0] == "@@@@":
        return lRuleJS
    del lRule[-1] # tGroups positioning codes are useless for Python
    # error messages
    for aAction in lRuleJS[6]:
        if aAction[1] == "-":
            aAction[2] = aAction[2].replace(" ", " ") # nbsp --> nnbsp
            aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »").replace(" :", " :").replace(" :", " :")
    # js regexes
    lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
    lRuleJS.append(lNegLookBehindRegex)
    return lRuleJS
def writeRulesToJSArray (lRules):
    "create rules as a string of arrays (to be bundled in a JSON string)"
    sArray = "[\n"
    for sOption, aRuleGroup in lRules:
        if sOption != "@@@@":
            sArray += '  ["' + sOption + '", [\n'  if sOption  else  "  [false, [\n"
            for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
                sArray += '    [' + sRegex + ", "
                sArray += "true, " if bCaseInsensitive  else "false, "
                sArray += '"' + sLineId + '", '
                sArray += '"' + sRuleId + '", '
                sArray += str(nPriority) + ", "
                sArray += json.dumps(lActions, ensure_ascii=False) + ", "
                sArray += json.dumps(aGroups, ensure_ascii=False) + ", "
                sArray += json.dumps(aNegLookBehindRegex, ensure_ascii=False) + "],\n"
            sArray += "  ]],\n"
        else:
            sArray += '  ["' + sOption + '", [\n'
            for sGraphName, sLineId in aRuleGroup:
                sArray += '    ["' + sGraphName + '", "' + sLineId + '"],\n"'
            sArray += "  ]],\n"
    sArray += "]"
    return sArray
def groupsPositioningCodeToList (sGroupsPositioningCode):
    "convert <sGroupsPositioningCode> to a list of codes (numbers or strings)"
    if not sGroupsPositioningCode:
        return None
    return [ int(sCode)  if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit())  else sCode \
             for sCode in sGroupsPositioningCode.split(",") ]
 | 
Added darg.py version [11706e17f5].
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | 
#!python3
"""
RULE GRAPH BUILDER
"""
# by Olivier R.
# License: MPL 2
import re
import traceback
class DARG:
    """DIRECT ACYCLIC RULE GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
    def __init__ (self, lRule, sLangCode):
        print(" > Direct Acyclic Rule Graph (DARG)", end=" ")
        # Preparing DARG
        self.sLangCode = sLangCode
        self.nRule = len(lRule)
        self.aPreviousRule = []
        Node.resetNextId()
        self.oRoot = Node()
        self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
        self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
        self.nNode = 0
        self.nArc = 0
        # build
        lRule.sort()
        for aRule in lRule:
            self.insert(aRule)
        self.finish()
        self.countNodes()
        self.countArcs()
        self.displayInfo()
    # BUILD DARG
    def insert (self, aRule):
        "insert a new rule (tokens must be inserted in order)"
        if aRule < self.aPreviousRule:
            exit("# Error: tokens must be inserted in order.")
        # find common prefix between word and previous word
        nCommonPrefix = 0
        for i in range(min(len(aRule), len(self.aPreviousRule))):
            if aRule[i] != self.aPreviousRule[i]:
                break
            nCommonPrefix += 1
        # Check the lUncheckedNodes for redundant nodes, proceeding from last
        # one down to the common prefix size. Then truncate the list at that point.
        self._minimize(nCommonPrefix)
        # add the suffix, starting from the correct node mid-way through the graph
        if len(self.lUncheckedNodes) == 0:
            oNode = self.oRoot
        else:
            oNode = self.lUncheckedNodes[-1][2]
        iToken = nCommonPrefix
        for sToken in aRule[nCommonPrefix:]:
            oNextNode = Node()
            oNode.dArcs[sToken] = oNextNode
            self.lUncheckedNodes.append((oNode, sToken, oNextNode))
            if iToken == (len(aRule) - 2):
                oNode.bFinal = True
            iToken += 1
            oNode = oNextNode
        oNode.bFinal = True
        self.aPreviousRule = aRule
    def finish (self):
        "minimize unchecked nodes"
        self._minimize(0)
    def _minimize (self, downTo):
        # proceed from the leaf up to a certain point
        for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
            oNode, sToken, oChildNode = self.lUncheckedNodes[i]
            if oChildNode in self.lMinimizedNodes:
                # replace the child with the previously encountered one
                oNode.dArcs[sToken] = self.lMinimizedNodes[oChildNode]
            else:
                # add the state to the minimized nodes.
                self.lMinimizedNodes[oChildNode] = oChildNode
            self.lUncheckedNodes.pop()
    def countNodes (self):
        "count nodes within the whole graph"
        self.nNode = len(self.lMinimizedNodes)
    def countArcs (self):
        "count arcs within the whole graph"
        self.nArc = 0
        for oNode in self.lMinimizedNodes:
            self.nArc += len(oNode.dArcs)
    def displayInfo (self):
        "display informations about the rule graph"
        print(": {:>10,} rules,  {:>10,} nodes,  {:>10,} arcs".format(self.nRule, self.nNode, self.nArc))
    def createGraph (self):
        "create the graph as a dictionary"
        dGraph = { 0: self.oRoot.getNodeAsDict() }
        for oNode in self.lMinimizedNodes:
            sHashId = oNode.__hash__()
            if sHashId not in dGraph:
                dGraph[sHashId] = oNode.getNodeAsDict()
            else:
                print("Error. Double node… same id: ", sHashId)
                print(str(oNode.getNodeAsDict()))
        dGraph = self._rewriteKeysOfDARG(dGraph)
        self._checkRegexes(dGraph)
        return dGraph
    def _rewriteKeysOfDARG (self, dGraph):
        "keys of DARG are long numbers (hashes): this function replace these hashes with smaller numbers (to reduce storing size)"
        # create translation dictionary
        dKeyTrans = {}
        for i, nKey in enumerate(dGraph):
            dKeyTrans[nKey] = i
        # replace keys
        dNewGraph = {}
        for nKey, dVal in dGraph.items():
            dNewGraph[dKeyTrans[nKey]] = dVal
        for nKey, dVal in dGraph.items():
            for sArc, val in dVal.items():
                if type(val) is int:
                    dVal[sArc] = dKeyTrans[val]
                else:
                    for sArc, nKey in val.items():
                        val[sArc] = dKeyTrans[nKey]
        return dNewGraph
    def _checkRegexes (self, dGraph):
        "check validity of regexes"
        aRegex = set()
        for nKey, dVal in dGraph.items():
            if "<re_value>" in dVal:
                for sRegex in dVal["<re_value>"]:
                    if sRegex not in aRegex:
                        self._checkRegex(sRegex)
                        aRegex.add(sRegex)
            if "<re_morph>" in dVal:
                for sRegex in dVal["<re_morph>"]:
                    if sRegex not in aRegex:
                        self._checkRegex(sRegex)
                        aRegex.add(sRegex)
        aRegex.clear()
    def _checkRegex (self, sRegex):
        #print(sRegex)
        if "¬" in sRegex:
            sPattern, sNegPattern = sRegex.split("¬")
            try:
                if not sNegPattern:
                    print("# Warning! Empty negpattern:", sRegex)
                re.compile(sPattern)
                if sNegPattern != "*":
                    re.compile(sNegPattern)
            except:
                print("# Error. Wrong regex:", sRegex)
                exit()
        else:
            try:
                if not sRegex:
                    print("# Warning! Empty pattern:", sRegex)
                re.compile(sRegex)
            except:
                print("# Error. Wrong regex:", sRegex)
                exit()
class Node:
    """Node of the rule graph"""
    NextId = 0
    def __init__ (self):
        self.i = Node.NextId
        Node.NextId += 1
        self.bFinal = False
        self.dArcs = {}          # key: arc value; value: a node
    @classmethod
    def resetNextId (cls):
        "reset to 0 the node counter"
        cls.NextId = 0
    def __str__ (self):
        # Caution! this function is used for hashing and comparison!
        cFinal = "1"  if self.bFinal  else "0"
        l = [cFinal]
        for (key, oNode) in self.dArcs.items():
            l.append(str(key))
            l.append(str(oNode.i))
        return "_".join(l)
    def __hash__ (self):
        # Used as a key in a python dictionary.
        return self.__str__().__hash__()
    def __eq__ (self, other):
        # Used as a key in a python dictionary.
        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
        return self.__str__() == other.__str__()
    def getNodeAsDict (self):
        "returns the node as a dictionary structure"
        dNode = {}
        dReValue = {}
        dReMorph = {}
        dRule = {}
        dLemma = {}
        dMeta = {}
        dTag = {}
        for sArc, oNode in self.dArcs.items():
            if sArc.startswith("@") and len(sArc) > 1:
                dReMorph[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("~") and len(sArc) > 1:
                dReValue[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith(">") and len(sArc) > 1:
                dLemma[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("*") and len(sArc) > 1:
                dMeta[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("/") and len(sArc) > 1:
                dTag[sArc[1:]] = oNode.__hash__()
            elif sArc.startswith("##"):
                dRule[sArc[1:]] = oNode.__hash__()
            else:
                dNode[sArc] = oNode.__hash__()
        if dReValue:
            dNode["<re_value>"] = dReValue
        if dReMorph:
            dNode["<re_morph>"] = dReMorph
        if dLemma:
            dNode["<lemmas>"] = dLemma
        if dTag:
            dNode["<tags>"] = dTag
        if dMeta:
            dNode["<meta>"] = dMeta
        if dRule:
            dNode["<rules>"] = dRule
        #if self.bFinal:
        #    dNode["<final>"] = 1
        return dNode
 | 
Modified gc_core/js/lang_core/gc_engine.js from [7ee1350cd7] to [12095116ac].
| ︙ | ︙ | |||
| 35 36 37 38 39 40 41 | // data let _sAppContext = ""; // what software is running let _dOptions = null; let _aIgnoredRules = new Set(); let _oSpellChecker = null; | < | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | 
// data
let _sAppContext = "";                                  // what software is running
let _dOptions = null;
let _aIgnoredRules = new Set();
let _oSpellChecker = null;
var gc_engine = {
    //// Informations
    lang: "${lang}",
 | 
| ︙ | ︙ | |||
| 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | 
                var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
                _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
            } else {
                _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
        }
        catch (e) {
            helpers.logerror(e);
        }
    },
    getSpellChecker: function () {
 | > | 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | 
                var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
                _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
            } else {
                _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
            }
            _sAppContext = sContext;
            _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
            _oSpellChecker.activateStorage();
        }
        catch (e) {
            helpers.logerror(e);
        }
    },
    getSpellChecker: function () {
 | 
| ︙ | ︙ | |||
| 374 375 376 377 378 379 380 | 
function displayInfo (dDA, aWord) {
    // for debugging: info of word
    if (!aWord) {
        helpers.echo("> nothing to find");
        return true;
    }
 | | > | | < < < < < < < < < < | < < < | | > | | | > | | < < < < < < < < < < < | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 | 
function displayInfo (dDA, aWord) {
    // for debugging: info of word
    if (!aWord) {
        helpers.echo("> nothing to find");
        return true;
    }
    let lMorph = _oSpellChecker.getMorph(aWord[1]);
    if (lMorph.length === 0) {
        helpers.echo("> not in dictionary");
        return true;
    }
    if (dDA.has(aWord[0])) {
        helpers.echo("DA: " + dDA.get(aWord[0]));
    }
    helpers.echo("FSA: " + lMorph);
    return true;
}
function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
    // analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
    if (!aWord) {
        //helpers.echo("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //helpers.echo("aWord: "+aWord.toString());
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _oSpellChecker.getMorph(aWord[1]);
    //helpers.echo("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //helpers.echo("***");
    if (bStrict) {
        return lMorph.every(s  =>  (s.search(sPattern) !== -1));
    }
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
    // analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
    if (!aWord) {
        //helpers.echo("morph: noword, returns " + bNoWord);
        return bNoWord;
    }
    //helpers.echo("aWord: "+aWord.toString());
    let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _oSpellChecker.getMorph(aWord[1]);
    //helpers.echo("lMorph: "+lMorph.toString());
    if (lMorph.length === 0) {
        return false;
    }
    //helpers.echo("***");
    // check negative condition
    if (lMorph.some(s  =>  (s.search(sNegPattern) !== -1))) {
        return false;
    }
    // search sPattern
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
function analyse (sWord, sPattern, bStrict=true) {
    // analyse a word, return true if sPattern in morphologies (disambiguation off)
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0) {
        return false;
    }
    if (bStrict) {
        return lMorph.every(s  =>  (s.search(sPattern) !== -1));
    }
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
function analysex (sWord, sPattern, sNegPattern) {
    // analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0) {
        return false;
    }
    // check negative condition
    if (lMorph.some(s  =>  (s.search(sNegPattern) !== -1))) {
        return false;
    }
    // search sPattern
    return lMorph.some(s  =>  (s.search(sPattern) !== -1));
}
//// functions to get text outside pattern scope
// warning: check compile_rules.py to understand how it works
 | 
| ︙ | ︙ | |||
| 563 564 565 566 567 568 569 | 
function select (dDA, nPos, sWord, sPattern, lDefault=null) {
    if (!sWord) {
        return true;
    }
    if (dDA.has(nPos)) {
        return true;
    }
 | | < < | | | | < < | | | | 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 | 
function select (dDA, nPos, sWord, sPattern, lDefault=null) {
    if (!sWord) {
        return true;
    }
    if (dDA.has(nPos)) {
        return true;
    }
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0  ||  lMorph.length === 1) {
        return true;
    }
    let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
    if (lSelect.length > 0) {
        if (lSelect.length != lMorph.length) {
            dDA.set(nPos, lSelect);
        }
    } else if (lDefault) {
        dDA.set(nPos, lDefaul);
    }
    return true;
}
function exclude (dDA, nPos, sWord, sPattern, lDefault=null) {
    if (!sWord) {
        return true;
    }
    if (dDA.has(nPos)) {
        return true;
    }
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0  ||  lMorph.length === 1) {
        return true;
    }
    let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
    if (lSelect.length > 0) {
        if (lSelect.length != lMorph.length) {
            dDA.set(nPos, lSelect);
        }
    } else if (lDefault) {
        dDA.set(nPos, lDefault);
    }
    return true;
}
 | 
| ︙ | ︙ | 
Modified gc_core/py/__init__.py from [aeadedff14] to [49f46a05ff].
| 1 2 | from .grammar_checker import * | > > > | 1 2 3 4 5 | """ Grammar checker """ from .grammar_checker import * | 
Modified gc_core/py/grammar_checker.py from [79ce1061e8] to [634e5c7c61].
| 
 | 
 | > | < > > > > > | | | > | | | > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | 
"""
Grammalecte, grammar checker
"""
import importlib
import json
from . import text
class GrammarChecker:
    "GrammarChecker: Wrapper for the grammar checker engine"
    def __init__ (self, sLangCode, sContext="Python"):
        self.sLangCode = sLangCode
        # Grammar checker engine
        self.gce = importlib.import_module("."+sLangCode, "grammalecte")
        self.gce.load(sContext)
        # Spell checker
        self.oSpellChecker = self.gce.getSpellChecker()
        # Lexicographer
        self.oLexicographer = None
        # Text formatter
        self.oTextFormatter = None
    def getGCEngine (self):
        "return the grammar checker object"
        return self.gce
    def getSpellChecker (self):
        "return the spell checker object"
        return self.oSpellChecker
    def getTextFormatter (self):
        "load and return the text formatter"
        if self.oTextFormatter is None:
            tf = importlib.import_module("."+self.sLangCode+".textformatter", "grammalecte")
            self.oTextFormatter = tf.TextFormatter()
        return self.oTextFormatter
    def getLexicographer (self):
        "load and return the lexicographer"
        if self.oLexicographer is None:
            lxg = importlib.import_module("."+self.sLangCode+".lexicographe", "grammalecte")
            self.oLexicographer = lxg.Lexicographe(self.oSpellChecker)
        return self.oLexicographer
    def displayGCOptions (self):
        "display the grammar checker options"
        self.gce.displayOptions()
    def getParagraphErrors (self, sText, dOptions=None, bContext=False, bSpellSugg=False, bDebug=False):
        "returns a tuple: (grammar errors, spelling errors)"
        aGrammErrs = self.gce.parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext)
        aSpellErrs = self.oSpellChecker.parseParagraph(sText, bSpellSugg)
        return aGrammErrs, aSpellErrs
    def generateText (self, sText, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "[todo]"
        pass
    def generateTextAsJSON (self, sText, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, bDebug=False):
        "[todo]"
        pass
    def generateParagraph (self, sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "parse text and return a readable text with underline errors"
        aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug)
        if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
            return ""
        return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)
    def generateParagraphAsJSON (self, iIndex, sText, dOptions=None, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, lLineSet=None, bDebug=False):
        "parse text and return errors as a JSON string"
        aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, bContext, bSpellSugg, bDebug)
        aGrammErrs = list(aGrammErrs)
        if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
            return ""
        if lLineSet:
            aGrammErrs, aSpellErrs = text.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
            return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
        if bReturnText:
            return json.dumps({ "iParagraph": iIndex, "sText": sText, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
        return json.dumps({ "iParagraph": iIndex, "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
 | 
Modified gc_core/py/lang_core/gc_engine.py from [72ecd7c680] to [652a920cd2].
| 
 | 
 | > | | > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | 
"""
Grammalecte
Grammar checker engine
"""
import re
import sys
import os
import traceback
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dAllGraph, dRule
try:
    # LibreOffice / OpenOffice
    from com.sun.star.linguistic2 import SingleProofreadingError
    from com.sun.star.text.TextMarkupType import PROOFREADING
    from com.sun.star.beans import PropertyValue
    #import lightproof_handler_${implname} as opt
    _bWriterError = True
except ImportError:
    _bWriterError = False
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
            "load", "parse", "getSpellChecker", \
            "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
            "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
 | 
| ︙ | ︙ | |||
| 29 30 31 32 33 34 35 | 
author = "${author}"
_rules = None                               # module gc_rules
# data
_sAppContext = ""                           # what software is running
_dOptions = None
 | < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < < < < < < > > > > < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | > | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < < < < < | | | > | > | | | < < < < < < < | < < | | | | | > | < | | | | | < | | | | | > | | | | < < < < < < < < > > > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 | 
author = "${author}"
_rules = None                               # module gc_rules
# data
_sAppContext = ""                           # what software is running
_dOptions = None
_oSpellChecker = None
_oTokenizer = None
_aIgnoredRules = set()
#### Initialization
def load (sContext="Python"):
    "initialization of the grammar checker"
    global _oSpellChecker
    global _sAppContext
    global _dOptions
    global _oTokenizer
    try:
        _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
        _oTokenizer = _oSpellChecker.getTokenizer()
        _oSpellChecker.activateStorage()
    except:
        traceback.print_exc()
def _getRules (bParagraph):
    try:
        if not bParagraph:
            return _rules.lSentenceRules
        return _rules.lParagraphRules
    except:
        _loadRules()
    if not bParagraph:
        return _rules.lSentenceRules
    return _rules.lParagraphRules
def _loadRules ():
    from . import gc_rules
    global _rules
    _rules = gc_rules
    # compile rules regex
    for sOption, lRuleGroup in chain(_rules.lParagraphRules, _rules.lSentenceRules):
        if sOption != "@@@@":
            for aRule in lRuleGroup:
                try:
                    aRule[0] = re.compile(aRule[0])
                except:
                    echo("Bad regular expression in # " + str(aRule[2]))
                    aRule[0] = "(?i)<Grammalecte>"
#### Parsing
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
def _getSentenceBoundaries (sText):
    iStart = _zBeginOfParagraph.match(sText).end()
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
    "analyses the paragraph sText and returns list of errors"
    #sText = unicodedata.normalize("NFC", sText)
    dErrors = {}
    sRealText = sText
    dPriority = {}  # Key = position; value = priority
    dOpt = _dOptions  if not dOptions  else dOptions
    bShowRuleId = option('idrule')
    # parse paragraph
    try:
        sNew, dErrors = _proofread(None, sText, sRealText, 0, True, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        if sNew:
            sText = sNew
    except:
        raise
    # cleanup
    if " " in sText:
        sText = sText.replace(" ", ' ') # nbsp
    if " " in sText:
        sText = sText.replace(" ", ' ') # nnbsp
    if "'" in sText:
        sText = sText.replace("'", "’")
    if "‑" in sText:
        sText = sText.replace("‑", "-") # nobreakdash
    # parse sentences
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            try:
                oSentence = TextParser(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
            except:
                raise
    return dErrors.values() # this is a view (iterable)
def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
    bParagraphChange = False
    bSentenceChange = False
    dTokenPos = oSentence.dTokenPos if oSentence else {}
    for sOption, lRuleGroup in _getRules(bParagraph):
        if sOption == "@@@@":
            # graph rules
            oSentence.dError = dErrors
            if not bParagraph and bSentenceChange:
                oSentence.update(s, bDebug)
                bSentenceChange = False
            for sGraphName, sLineId in lRuleGroup:
                if bDebug:
                    print("\n>>>> GRAPH:", sGraphName, sLineId)
                bParagraphChange, s = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
                dErrors.update(oSentence.dError)
                dTokenPos = oSentence.dTokenPos
        elif not sOption or dOptions.get(sOption, False):
            # regex rules
            for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup:
                if sRuleId not in _aIgnoredRules:
                    for m in zRegex.finditer(s):
                        bCondMemo = None
                        for sFuncCond, cActionType, sWhat, *eAct in lActions:
                            # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroup[, message, URL]] ]
                            try:
                                bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dTokenPos, sCountry, bCondMemo)
                                if bCondMemo:
                                    if bDebug:
                                        print("RULE:", sLineId)
                                    if cActionType == "-":
                                        # grammar error
                                        nErrorStart = nOffset + m.start(eAct[0])
                                        if nErrorStart not in dErrors or nPriority > dPriority.get(nErrorStart, -1):
                                            dErrors[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)
                                            dPriority[nErrorStart] = nPriority
                                    elif cActionType == "~":
                                        # text processor
                                        s = _rewrite(s, sWhat, eAct[0], m, bUppercase)
                                        bParagraphChange = True
                                        bSentenceChange = True
                                        if bDebug:
                                            echo("~ " + s + "  -- " + m.group(eAct[0]) + "  # " + sLineId)
                                    elif cActionType == "=":
                                        # disambiguation
                                        if not bParagraph:
                                            globals()[sWhat](s, m, dTokenPos)
                                            if bDebug:
                                                echo("= " + m.group(0) + "  # " + sLineId)
                                    elif cActionType == ">":
                                        # we do nothing, this test is just a condition to apply all following actions
                                        pass
                                    else:
                                        echo("# error: unknown action at " + sLineId)
                                elif cActionType == ">":
                                    break
                            except Exception as e:
                                raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
    if bParagraphChange:
        return (s, dErrors)
    return (False, dErrors)
def _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
    nStart = nOffset + m.start(iGroup)
    nEnd = nOffset + m.end(iGroup)
    # suggestions
    if sRepl[0:1] == "=":
        sSugg = globals()[sRepl[1:]](s, m)
        lSugg = sSugg.split("|")  if sSugg  else []
    elif sRepl == "_":
        lSugg = []
    else:
        lSugg = m.expand(sRepl).split("|")
    if bUppercase and lSugg and m.group(iGroup)[0:1].isupper():
        lSugg = list(map(str.capitalize, lSugg))
    # Message
    sMessage = globals()[sMsg[1:]](s, m)  if sMsg[0:1] == "="  else  m.expand(sMsg)
    if bShowRuleId:
        sMessage += "  # " + sLineId + " # " + sRuleId
    #
    if _bWriterError:
        xErr = SingleProofreadingError()    # uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart
        xErr.nErrorLength = nEnd - nStart
        xErr.nErrorType = PROOFREADING
        xErr.aRuleIdentifier = sRuleId
        xErr.aShortComment = sMessage   # sMessage.split("|")[0]     # in context menu
        xErr.aFullComment = sMessage   # sMessage.split("|")[-1]    # in dialog
        if bShowRuleId:
            xErr.aShortComment += "  " + sLineId + " # " + sRuleId
        xErr.aSuggestions = tuple(lSugg)
        if sURL:
            xProperty = PropertyValue()
            xProperty.Name = "FullCommentURL"
            xProperty.Value = sURL
            xErr.aProperties = (xProperty,)
        else:
            xErr.aProperties = ()
        return xErr
    else:
        dErr = {}
        dErr["nStart"] = nStart
        dErr["nEnd"] = nEnd
        dErr["sLineId"] = sLineId
        dErr["sRuleId"] = sRuleId
        dErr["sType"] = sOption  if sOption  else "notype"
        dErr["sMessage"] = sMessage
        dErr["aSuggestions"] = lSugg
        dErr["URL"] = sURL  if sURL  else ""
        if bContext:
            dErr['sUnderlined'] = self.sSentence0[nStart:nEnd]
            dErr['sBefore'] = self.sSentence0[max(0,nStart-80):nStart]
            dErr['sAfter'] = self.sSentence0[nEnd:nEnd+80]
        return dErr
def _rewrite (sSentence, sRepl, iGroup, m, bUppercase):
    "text processor: write <sRepl> in <sSentence> at <iGroup> position"
    nLen = m.end(iGroup) - m.start(iGroup)
    if sRepl == "*":
        sNew = " " * nLen
    elif sRepl == "_":
        sNew = sRepl + " " * (nLen-1)
    elif sRepl[0:1] == "=":
        sNew = globals()[sRepl[1:]](sSentence, m)
        sNew = sNew + " " * (nLen-len(sNew))
        if bUppercase and m.group(iGroup)[0:1].isupper():
            sNew = sNew.capitalize()
    else:
        sNew = m.expand(sRepl)
        sNew = sNew + " " * (nLen-len(sNew))
    return sSentence[0:m.start(iGroup)] + sNew + sSentence[m.end(iGroup):]
def ignoreRule (sRuleId):
    "disable rule <sRuleId>"
    _aIgnoredRules.add(sRuleId)
def resetIgnoreRules ():
    "clear all ignored rules"
    _aIgnoredRules.clear()
def reactivateRule (sRuleId):
    "(re)activate rule <sRuleId>"
    _aIgnoredRules.discard(sRuleId)
def listRules (sFilter=None):
    "generator: returns typle (sOption, sLineId, sRuleId)"
    if sFilter:
        try:
            zFilter = re.compile(sFilter)
        except:
            echo("# Error. List rules: wrong regex.")
            sFilter = None
    for sOption, lRuleGroup in chain(_getRules(True), _getRules(False)):
        if sOption != "@@@@":
            for _, _, sLineId, sRuleId, _, _ in lRuleGroup:
                if not sFilter or zFilter.search(sRuleId):
                    yield (sOption, sLineId, sRuleId)
def displayRules (sFilter=None):
    "display the name of rules, with the filter <sFilter>"
    echo("List of rules. Filter: << " + str(sFilter) + " >>")
    for sOption, sLineId, sRuleId in listRules(sFilter):
        echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))
def setOption (sOpt, bVal):
    "set option <sOpt> with <bVal> if it exists"
    if sOpt in _dOptions:
        _dOptions[sOpt] = bVal
def setOptions (dOpt):
    "update the dictionary of options with <dOpt>"
    for sKey, bVal in dOpt.items():
        if sKey in _dOptions:
            _dOptions[sKey] = bVal
def getOptions ():
    "return the dictionary of current options"
    return _dOptions
def getDefaultOptions ():
    "return the dictionary of default options"
    return dict(gc_options.getOptions(_sAppContext))
def getOptionsLabels (sLang):
    "return options labels"
    return gc_options.getUI(sLang)
def displayOptions (sLang):
    "display the list of grammar checking options"
    echo("List of options")
    echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0]  for k, v  in sorted(_dOptions.items()) ] ))
    echo("")
def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))
def getSpellChecker ():
    "return the spellchecker object"
    return _oSpellChecker
def _getPath ():
    return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
def option (sOpt):
    "return True if option <sOpt> is active"
    return _dOptions.get(sOpt, False)
def displayInfo (dTokenPos, tWord):
    "for debugging: retrieve info of word"
    if not tWord:
        echo("> nothing to find")
        return True
    lMorph = _oSpellChecker.getMorph(tWord[1])
    if not lMorph:
        echo("> not in dictionary")
        return True
    print("TOKENS:", dTokenPos)
    if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]:
        echo("DA: " + str(dTokenPos[tWord[0]]["lMorph"]))
    echo("FSA: " + str(lMorph))
    return True
def morph (dTokenPos, tWord, sPattern, bStrict=True, bNoWord=False):
    "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
    if not tWord:
        return bNoWord
    lMorph = dTokenPos[tWord[0]]["lMorph"]  if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]  else _oSpellChecker.getMorph(tWord[1])
    if not lMorph:
        return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return bool(lMorph) and all(zPattern.search(s)  for s in lMorph)
    return any(zPattern.search(s)  for s in lMorph)
def morphex (dTokenPos, tWord, sPattern, sNegPattern, bNoWord=False):
    "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
    if not tWord:
        return bNoWord
    lMorph = dTokenPos[tWord[0]]["lMorph"]  if tWord[0] in dTokenPos and "lMorph" in dTokenPos[tWord[0]]  else _oSpellChecker.getMorph(tWord[1])
    if not lMorph:
        return False
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(s)  for s in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(s)  for s in lMorph)
def analyse (sWord, sPattern, bStrict=True):
    "analyse a word, return True if sPattern in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(sWord)
    if not lMorph:
        return False
    zPattern = re.compile(sPattern)
    if bStrict:
        return bool(lMorph) and all(zPattern.search(s)  for s in lMorph)
    return any(zPattern.search(s)  for s in lMorph)
def analysex (sWord, sPattern, sNegPattern):
    "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(sWord)
    if not lMorph:
        return False
    # check negative condition
    zNegPattern = re.compile(sNegPattern)
    if any(zNegPattern.search(s)  for s in lMorph):
        return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(s)  for s in lMorph)
## functions to get text outside pattern scope
# warning: check compile_rules.py to understand how it works
_zNextWord = re.compile(r" +(\w[\w-]*)")
_zPrevWord = re.compile(r"(\w[\w-]*) +$")
def nextword (s, iStart, n):
    "get the nth word of the input string or empty string"
    m = re.match("(?: +[\\w%-]+){" + str(n-1) + "} +([\\w%-]+)", s[iStart:])
    if not m:
        return None
    return (iStart+m.start(1), m.group(1))
 | 
| ︙ | ︙ | |||
| 510 511 512 513 514 515 516 | 
    if sNegPattern and re.search(sNegPattern, s):
        return False
    if re.search(sPattern, s):
        return True
    return False
 | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > | < | < | | < | | | < | < > > > > > > > > | < < < < < < < < | | | < | < > | > | > > | > > > > > > > > > > > > > | 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 | 
    if sNegPattern and re.search(sNegPattern, s):
        return False
    if re.search(sPattern, s):
        return True
    return False
def look_chk1 (dTokenPos, s, nOffset, sPattern, sPatternGroup1, sNegPatternGroup1=None):
    "returns True if s has pattern sPattern and m.group(1) has pattern sPatternGroup1"
    m = re.search(sPattern, s)
    if not m:
        return False
    try:
        sWord = m.group(1)
        nPos = m.start(1) + nOffset
    except:
        return False
    if sNegPatternGroup1:
        return morphex(dTokenPos, (nPos, sWord), sPatternGroup1, sNegPatternGroup1)
    return morph(dTokenPos, (nPos, sWord), sPatternGroup1, False)
#### Disambiguator
def select (dTokenPos, nPos, sWord, sPattern, lDefault=None):
    "Disambiguation: select morphologies of <sWord> matching <sPattern>"
    if not sWord:
        return True
    if nPos not in dTokenPos:
        print("Error. There should be a token at this position: ", nPos)
        return True
    lMorph = _oSpellChecker.getMorph(sWord)
    if not lMorph or len(lMorph) == 1:
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dTokenPos[nPos]["lMorph"] = lSelect
    elif lDefault:
        dTokenPos[nPos]["lMorph"] = lDefault
    return True
def exclude (dTokenPos, nPos, sWord, sPattern, lDefault=None):
    "Disambiguation: exclude morphologies of <sWord> matching <sPattern>"
    if not sWord:
        return True
    if nPos not in dTokenPos:
        print("Error. There should be a token at this position: ", nPos)
        return True
    lMorph = _oSpellChecker.getMorph(sWord)
    if not lMorph or len(lMorph) == 1:
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if not re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dTokenPos[nPos]["lMorph"] = lSelect
    elif lDefault:
        dTokenPos[nPos]["lMorph"] = lDefault
    return True
def define (dTokenPos, nPos, lMorph):
    "Disambiguation: set morphologies of token at <nPos> with <lMorph>"
    if nPos not in dTokenPos:
        print("Error. There should be a token at this position: ", nPos)
        return True
    dTokenPos[nPos]["lMorph"] = lMorph
    return True
#### TEXT PARSER
class TextParser:
    "Text parser"
    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffsetWithinParagraph = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        self.dTags = {}
        self.dError = {}
    def __str__ (self):
        s = "TEXT ==========\n"
        s += "sentence: " + self.sSentence0 + "\n"
        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lToken:
            s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}'
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            s += "\n"
        for nPos, dToken in self.dTokenPos.items():
            s += f"{nPos}\t{dToken}\n"
        return s
    def update (self, sSentence, bDebug=False):
        "update <sSentence> and retokenize"
        self.sSentence = sSentence
        lNewToken = list(_oTokenizer.genTokens(sSentence, True))
        for dToken in lNewToken:
            if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
        self.lToken = lNewToken
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        if bDebug:
            print("UPDATE:")
            print(self)
    def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
        dNode = dPointer["dNode"]
        iNode1 = dPointer["iNode1"]
        bTokenFound = False
        # token value
        if dToken["sValue"] in dNode:
            if bDebug:
                print("  MATCH:", dToken["sValue"])
            yield { "iNode1": iNode1, "dNode": dGraph[dNode[dToken["sValue"]]] }
            bTokenFound = True
        if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
            sValue = dToken["sValue"].lower()
            if sValue in dNode:
                if bDebug:
                    print("  MATCH:", sValue)
                yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
                bTokenFound = True
        elif dToken["sValue"].isupper():
            sValue = dToken["sValue"].lower()
            if sValue in dNode:
                if bDebug:
                    print("  MATCH:", sValue)
                yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
                bTokenFound = True
            sValue = dToken["sValue"].capitalize()
            if sValue in dNode:
                if bDebug:
                    print("  MATCH:", sValue)
                yield { "iNode1": iNode1, "dNode": dGraph[dNode[sValue]] }
                bTokenFound = True
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if re.search(sRegex, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                        bTokenFound = True
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
                        continue
                    if not sPattern or re.search(sPattern, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_value>"][sRegex]] }
                        bTokenFound = True
        # analysable tokens
        if dToken["sType"][0:4] == "WORD":
            # token lemmas
            if "<lemmas>" in dNode:
                for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                    if sLemma in dNode["<lemmas>"]:
                        if bDebug:
                            print("  MATCH: >" + sLemma)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<lemmas>"][sLemma]] }
                        bTokenFound = True
            # regex morph arcs
            if "<re_morph>" in dNode:
                for sRegex in dNode["<re_morph>"]:
                    if "¬" not in sRegex:
                        # no anti-pattern
                        lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                        if any(re.search(sRegex, sMorph)  for sMorph in lMorph):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
                            bTokenFound = True
                    else:
                        # there is an anti-pattern
                        sPattern, sNegPattern = sRegex.split("¬", 1)
                        if sNegPattern == "*":
                            # all morphologies must match with <sPattern>
                            if sPattern:
                                lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                                if lMorph and all(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                    if bDebug:
                                        print("  MATCH: @" + sRegex)
                                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
                                    bTokenFound = True
                        else:
                            lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                            if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in lMorph):
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield { "iNode1": iNode1, "dNode": dGraph[dNode["<re_morph>"][sRegex]] }
                                bTokenFound = True
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
                    if bDebug:
                        print("  MATCH: /" + sTag)
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<tags>"][sTag]] }
                    bTokenFound = True
        # meta arc (for token type)
        if "<meta>" in dNode:
            for sMeta in dNode["<meta>"]:
                # no regex here, we just search if <dNode["sType"]> exists within <sMeta>
                if sMeta == "*":
                    if bDebug:
                        print("  MATCH: *" + sMeta)
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"]["*"]] }
                    bTokenFound = True
                elif "¬" in sMeta:
                    if dToken["sType"] not in sMeta:
                        if bDebug:
                            print("  MATCH: *" + sMeta)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
                        bTokenFound = True
                elif dToken["sType"] == sMeta:
                    if bDebug:
                        print("  MATCH: *" + sMeta)
                    yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
                    bTokenFound = True
        if "bKeep" in dPointer and not bTokenFound:
            yield dPointer
        # JUMP
        # Warning! Recurssion!
        if "<>" in dNode:
            dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True }
            yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug)
    def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        "parse tokens from the text and execute actions encountered"
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bTagAndRewrite = False
        for iToken, dToken in enumerate(self.lToken):
            if bDebug:
                print("TOKEN:", dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
            for dPointer in lPointer:
                lNextPointer.extend(self._getNextPointers(dToken, dGraph, dPointer, bDebug))
            lPointer = lNextPointer
            # check arcs of first nodes
            lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug))
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                #if bDebug:
                #    print("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewrite(bDebug)
        if bDebug:
            print(self)
        return (bTagAndRewrite, self.sSentence)
    def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext):
        "execute actions found in the DARG"
        bChange = False
        for sLineId, nextNodeKey in dNode.items():
            bCondMemo = None
            for sRuleId in dGraph[nextNodeKey]:
                try:
                    if bDebug:
                        print("  TRY:", sRuleId)
                    sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                    # Suggestion    [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL ]
                    # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd, bCaseSvty ]
                    # Disambiguator [ option, condition, "=", replacement/suggestion/action ]
                    # Sentence Tag  [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
                    # Test          [ option, condition, ">", "" ]
                    if not sOption or dOptions.get(sOption, False):
                        bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags, self.sSentence, self.sSentence0)
                        if bCondMemo:
                            if cActionType == "-":
                                # grammar error
                                iTokenStart, iTokenEnd, cStartLimit, cEndLimit, bCaseSvty, nPriority, sMessage, sURL = eAct
                                nTokenErrorStart = nTokenOffset + iTokenStart  if iTokenStart > 0  else nLastToken + iTokenStart
                                if "bImmune" not in self.lToken[nTokenErrorStart]:
                                    nTokenErrorEnd = nTokenOffset + iTokenEnd  if iTokenEnd > 0  else nLastToken + iTokenEnd
                                    nErrorStart = self.nOffsetWithinParagraph + (self.lToken[nTokenErrorStart]["nStart"] if cStartLimit == "<"  else self.lToken[nTokenErrorStart]["nEnd"])
                                    nErrorEnd = self.nOffsetWithinParagraph + (self.lToken[nTokenErrorEnd]["nEnd"] if cEndLimit == ">"  else self.lToken[nTokenErrorEnd]["nStart"])
                                    if nErrorStart not in self.dError or nPriority > dPriority.get(nErrorStart, -1):
                                        self.dError[nErrorStart] = self._createError(sWhat, nTokenOffset, nLastToken, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, bCaseSvty, sMessage, sURL, bShowRuleId, "notype", bContext)
                                        dPriority[nErrorStart] = nPriority
                                        if bDebug:
                                            print("  NEW_ERROR:", self.dError[nErrorStart], "\n  ", dRule[sRuleId])
                            elif cActionType == "~":
                                # text processor
                                if bDebug:
                                    print("  TAG_PREPARE:\n  ", dRule[sRuleId])
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenStart, nTokenEnd, nTokenOffset, nLastToken, eAct[2], bDebug)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                if bDebug:
                                    print("  DISAMBIGUATOR:\n  ", dRule[sRuleId])
                                globals()[sWhat](self.lToken, nTokenOffset)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print("  COND_OK")
                                pass
                            elif cActionType == "/":
                                if bDebug:
                                    print("  SEMANTIC_TAG:\n  ", dRule[sRuleId])
                                nTokenStart = nTokenOffset + eAct[0]
                                nTokenEnd = nTokenOffset + (eAct[1]  if eAct[1]  else eAct[0])
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "tags" in self.lToken[i]:
                                        self.lToken[i]["tags"].update(sWhat.split("|"))
                                    else:
                                        self.lToken[i]["tags"] = set(sWhat.split("|"))
                            elif cActionType == "%":
                                # sentence tags
                                if bDebug:
                                    print("  SENTENCE_TAG:\n  ", dRule[sRuleId])
                                nTokenTag = nTokenOffset + eAct[0]
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = (nTokenTag, nTokenTag)
                                elif nTokenTag > self.dTags[sWhat][1]:
                                    self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print("  COND_BREAK")
                            break
                except Exception as e:
                    raise Exception(str(e), sLineId, sRuleId, self.sSentence)
        return bChange
    def _createError (self, sSugg, nTokenOffset, nLastToken, iFirstToken, nStart, nEnd, sLineId, sRuleId, bCaseSvty, sMsg, sURL, bShowRuleId, sOption, bContext):
        # suggestions
        if sSugg[0:1] == "=":
            sSugg = globals()[sSugg[1:]](self.lToken, nTokenOffset, nLastToken)
            lSugg = sSugg.split("|")  if sSugg  else []
        elif sSugg == "_":
            lSugg = []
        else:
            lSugg = self._expand(sSugg, nTokenOffset, nLastToken).split("|")
        if bCaseSvty and lSugg and self.lToken[iFirstToken]["sValue"][0:1].isupper():
            lSugg = list(map(lambda s: s[0:1].upper()+s[1:], lSugg))
        # Message
        sMessage = globals()[sMsg[1:]](self.lToken, nTokenOffset)  if sMsg[0:1] == "="  else self._expand(sMsg, nTokenOffset, nLastToken)
        if bShowRuleId:
            sMessage += "  " + sLineId + " # " + sRuleId
        #
        if _bWriterError:
            xErr = SingleProofreadingError()    # uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
            xErr.nErrorStart = nStart
            xErr.nErrorLength = nEnd - nStart
            xErr.nErrorType = PROOFREADING
            xErr.aRuleIdentifier = sRuleId
            xErr.aShortComment = sMessage   # sMessage.split("|")[0]     # in context menu
            xErr.aFullComment = sMessage   # sMessage.split("|")[-1]    # in dialog
            if bShowRuleId:
                xErr.aShortComment += "  " + sLineId + " # " + sRuleId
            xErr.aSuggestions = tuple(lSugg)
            if sURL:
                xProperty = PropertyValue()
                xProperty.Name = "FullCommentURL"
                xProperty.Value = sURL
                xErr.aProperties = (xProperty,)
            else:
                xErr.aProperties = ()
            return xErr
        else:
            dErr = {}
            dErr["nStart"] = nStart
            dErr["nEnd"] = nEnd
            dErr["sLineId"] = sLineId
            dErr["sRuleId"] = sRuleId
            dErr["sType"] = sOption  if sOption  else "notype"
            dErr["sMessage"] = sMessage
            dErr["aSuggestions"] = lSugg
            dErr["URL"] = sURL  if sURL  else ""
            if bContext:
                dErr['sUnderlined'] = self.sSentence0[nStart:nEnd]
                dErr['sBefore'] = self.sSentence0[max(0,nStart-80):nStart]
                dErr['sAfter'] = self.sSentence0[nEnd:nEnd+80]
            return dErr
    def _expand (self, sText, nTokenOffset, nLastToken):
        #print("*", sText)
        for m in re.finditer(r"\\(-?[0-9]+)", sText):
            if m.group(1)[0:1] == "-":
                sText = sText.replace(m.group(0), self.lToken[nLastToken+int(m.group(1))+1]["sValue"])
            else:
                sText = sText.replace(m.group(0), self.lToken[nTokenOffset+int(m.group(1))]["sValue"])
        #print(">", sText)
        return sText
    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, nLastToken, bCaseSvty, bDebug):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if bDebug:
            print("   START:", nTokenRewriteStart, "END:", nTokenRewriteEnd)
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bToRemove"] = True
        elif sWhat == "␣":
            # merge tokens
            self.lToken[nTokenRewriteStart]["nMergeUntil"] = nTokenRewriteEnd
        elif sWhat == "!":
            # immunity
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bImmune"] = True
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["bImmune"] = True
        elif sWhat == "_":
            # neutralized token
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["sNewValue"] = "_"
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["sNewValue"] = "_"
        else:
            if sWhat.startswith("="):
                sWhat = globals()[sWhat[1:]](self.lToken, nTokenOffset)
            else:
                sWhat = self._expand(sWhat, nTokenOffset, nLastToken)
            bUppercase = bCaseSvty and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper()
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                # one token
                sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat))
                if bUppercase:
                    sWhat = sWhat[0:1].upper() + sWhat[1:]
                self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat
            else:
                # several tokens
                lTokenValue = sWhat.split("|")
                if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1):
                    print("Error. Text processor: number of replacements != number of tokens.")
                    return
                for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue):
                    if not sValue or sValue == "*":
                        self.lToken[i]["bToRemove"] = True
                    else:
                        if bUppercase:
                            sValue = sValue[0:1].upper() + sValue[1:]
                        self.lToken[i]["sNewValue"] = sValue
    def rewrite (self, bDebug=False):
        "rewrite the sentence, modify tokens, purge the token list"
        if bDebug:
            print("REWRITE")
        lNewToken = []
        nMergeUntil = 0
        dTokenMerger = None
        for iToken, dToken in enumerate(self.lToken):
            bKeepToken = True
            if dToken["sType"] != "INFO":
                if "bImmune" in dToken:
                    nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
                    if nErrorStart in self.dError:
                        if bDebug:
                            print("immunity -> error removed:", self.dError[nErrorStart])
                        del self.dError[nErrorStart]
                if nMergeUntil and iToken <= nMergeUntil:
                    dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
                    dTokenMerger["nEnd"] = dToken["nEnd"]
                    if bDebug:
                        print("  MERGED TOKEN:", dTokenMerger["sValue"])
                    bKeepToken = False
                if "nMergeUntil" in dToken:
                    if iToken > nMergeUntil: # this token is not already merged with a previous token
                        dTokenMerger = dToken
                    if dToken["nMergeUntil"] > nMergeUntil:
                        nMergeUntil = dToken["nMergeUntil"]
                    del dToken["nMergeUntil"]
                elif "bToRemove" in dToken:
                    if bDebug:
                        print("  REMOVED:", dToken["sValue"])
                    self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]
                    bKeepToken = False
            #
            if bKeepToken:
                lNewToken.append(dToken)
                if "sNewValue" in dToken:
                    # rewrite token and sentence
                    if bDebug:
                        print(dToken["sValue"], "->", dToken["sNewValue"])
                    dToken["sRealValue"] = dToken["sValue"]
                    dToken["sValue"] = dToken["sNewValue"]
                    nDiffLen = len(dToken["sRealValue"]) - len(dToken["sNewValue"])
                    sNewRepl = (dToken["sNewValue"] + " " * nDiffLen)  if nDiffLen >= 0  else dToken["sNewValue"][:len(dToken["sRealValue"])]
                    self.sSentence = self.sSentence[:dToken["nStart"]] + sNewRepl + self.sSentence[dToken["nEnd"]:]
                    del dToken["sNewValue"]
            else:
                try:
                    del self.dTokenPos[dToken["nStart"]]
                except:
                    print(self)
                    print(dToken)
                    exit()
        if bDebug:
            print("  TEXT REWRITED:", self.sSentence)
        self.lToken.clear()
        self.lToken = lNewToken
#### Analyse tokens
def g_value (dToken, sValues, nLeft=None, nRight=None):
    "test if <dToken['sValue']> is in sValues (each value should be separated with |)"
    sValue = "|"+dToken["sValue"]+"|"  if nLeft is None  else "|"+dToken["sValue"][slice(nLeft, nRight)]+"|"
    if sValue in sValues:
        return True
    if dToken["sValue"][0:2].istitle(): # we test only 2 first chars, to make valid words such as "Laissez-les", "Passe-partout".
        if sValue.lower() in sValues:
            return True
    elif dToken["sValue"].isupper():
        #if sValue.lower() in sValues:
        #    return True
        sValue = "|"+sValue[1:].capitalize()
        if sValue.capitalize() in sValues:
            return True
    return False
def g_morph (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
    if "lMorph" in dToken:
        lMorph = dToken["lMorph"]
    else:
        if nLeft is not None:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
            if bMemorizeMorph:
                dToken["lMorph"] = lMorph
        else:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)
            return all(zPattern.search(sMorph)  for sMorph in lMorph)
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)
def g_analyse (dToken, sPattern, sNegPattern="", nLeft=None, nRight=None, bMemorizeMorph=True):
    "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    if nLeft is not None:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
        if bMemorizeMorph:
            dToken["lMorph"] = lMorph
    else:
        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)
            return all(zPattern.search(sMorph)  for sMorph in lMorph)
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)
def g_merged_analyse (dToken1, dToken2, cMerger, sPattern, sNegPattern="", bSetMorph=True):
    "merge two token values, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
    lMorph = _oSpellChecker.getMorph(dToken1["sValue"] + cMerger + dToken2["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)
            bResult = all(zPattern.search(sMorph)  for sMorph in lMorph)
            if bResult and bSetMorph:
                dToken1["lMorph"] = lMorph
            return bResult
        else:
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    bResult = any(zPattern.search(sMorph)  for sMorph in lMorph)
    if bResult and bSetMorph:
        dToken1["lMorph"] = lMorph
    return bResult
def g_tag_before (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["i"] > dTags[sTag][0]:
        return True
    return False
def g_tag_after (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["i"] < dTags[sTag][1]:
        return True
    return False
def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
    nSpace = dToken2["nStart"] - dToken1["nEnd"]
    if nSpace < nMin:
        return False
    if nMax is not None and nSpace > nMax:
        return False
    return True
def g_token (lToken, i):
    if i < 0:
        return lToken[0]
    if i >= len(lToken):
        return lToken[-1]
    return lToken[i]
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1:
        if lDefault:
            dToken["lMorph"] = lDefault
            #print("DA:", dToken["sValue"], dToken["lMorph"])
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    #print("DA:", dToken["sValue"], dToken["lMorph"])
    return True
def g_exclude (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph or len(lMorph) == 1:
        if lDefault:
            dToken["lMorph"] = lDefault
            #print("DA:", dToken["sValue"], dToken["lMorph"])
        return True
    lSelect = [ sMorph  for sMorph in lMorph  if not re.search(sPattern, sMorph) ]
    if lSelect:
        if len(lSelect) != len(lMorph):
            dToken["lMorph"] = lSelect
    elif lDefault:
        dToken["lMorph"] = lDefault
    #print("DA:", dToken["sValue"], dToken["lMorph"])
    return True
def g_define (dToken, lMorph):
    "set morphologies of <dToken>, always return True"
    dToken["lMorph"] = lMorph
    #print("DA:", dToken["sValue"], lMorph)
    return True
def g_define_from (dToken, nLeft=None, nRight=None):
    if nLeft is not None:
        dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
    else:
        dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"])
    return True
#### GRAMMAR CHECKER PLUGINS
${plugins}
#### CALLABLES FOR REGEX RULES (generated code)
${callables}
#### CALLABLES FOR GRAPH RULES (generated code)
${graph_callables}
 | 
Modified gc_core/py/lang_core/gc_options.py from [871c8d4b8f] to [c84731594a].
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | 
# generated code, do not edit
def getUI (sLang):
    if sLang in _dOptLabel:
        return _dOptLabel[sLang]
    return _dOptLabel["fr"]
def getOptions (sContext="Python"):
    if sContext in dOpt:
        return dOpt[sContext]
    return dOpt["Python"]
lStructOpt = ${lStructOpt}
 | > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | 
"""
Grammar checker default options
"""
# generated code, do not edit
def getUI (sLang):
    "returns dictionary of UI labels"
    if sLang in _dOptLabel:
        return _dOptLabel[sLang]
    return _dOptLabel["fr"]
def getOptions (sContext="Python"):
    "returns dictionary of options"
    if sContext in dOpt:
        return dOpt[sContext]
    return dOpt["Python"]
lStructOpt = ${lStructOpt}
 | 
| ︙ | ︙ | 
Modified gc_core/py/lang_core/gc_rules.py from [3cf95f4a21] to [2ef08593b5].
| 1 2 3 4 5 | 
# generated code, do not edit
lParagraphRules = ${paragraph_rules}
lSentenceRules = ${sentence_rules}
 | > > > > | 1 2 3 4 5 6 7 8 9 | 
"""
Grammar checker regex rules
"""
# generated code, do not edit
lParagraphRules = ${paragraph_rules}
lSentenceRules = ${sentence_rules}
 | 
Added gc_core/py/lang_core/gc_rules_graph.py version [373592f3fb].
| > > > > > > > > > | 1 2 3 4 5 6 7 8 9 | 
"""
Grammar checker graph rules
"""
# generated code, do not edit
dAllGraph = ${rules_graphs}
dRule = ${rules_actions}
 | 
Modified gc_core/py/text.py from [133d154e72] to [137c7cc30f].
| 1 2 3 4 5 6 7 8 | 
#!python3
import textwrap
from itertools import chain
def getParagraph (sText):
    "generator: returns paragraphs of text"
 | > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 | 
#!python3
"""
Text tools
"""
import textwrap
from itertools import chain
def getParagraph (sText):
    "generator: returns paragraphs of text"
 | 
| ︙ | ︙ | |||
| 39 40 41 42 43 44 45 | 
        return ""
    lGrammErrs = sorted(aGrammErrs, key=lambda d: d["nStart"])
    lSpellErrs = sorted(aSpellErrs, key=lambda d: d['nStart'])
    sText = ""
    nOffset = 0
    for sLine in wrap(sParagraph, nWidth): # textwrap.wrap(sParagraph, nWidth, drop_whitespace=False)
        sText += sLine + "\n"
 | | | | | | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | 
        return ""
    lGrammErrs = sorted(aGrammErrs, key=lambda d: d["nStart"])
    lSpellErrs = sorted(aSpellErrs, key=lambda d: d['nStart'])
    sText = ""
    nOffset = 0
    for sLine in wrap(sParagraph, nWidth): # textwrap.wrap(sParagraph, nWidth, drop_whitespace=False)
        sText += sLine + "\n"
        nLineLen = len(sLine)
        sErrLine = ""
        nLenErrLine = 0
        nGrammErr = 0
        nSpellErr = 0
        for dErr in lGrammErrs:
            nStart = dErr["nStart"] - nOffset
            if nStart < nLineLen:
                nGrammErr += 1
                if nStart >= nLenErrLine:
                    sErrLine += " " * (nStart - nLenErrLine) + "^" * (dErr["nEnd"] - dErr["nStart"])
                    nLenErrLine = len(sErrLine)
            else:
                break
        for dErr in lSpellErrs:
            nStart = dErr['nStart'] - nOffset
            if nStart < nLineLen:
                nSpellErr += 1
                nEnd = dErr['nEnd'] - nOffset
                if nEnd > len(sErrLine):
                    sErrLine += " " * (nEnd - len(sErrLine))
                sErrLine = sErrLine[:nStart] + "°" * (nEnd - nStart) + sErrLine[nEnd:]
            else:
                break
        if sErrLine:
            sText += sErrLine + "\n"
        if nGrammErr:
            sText += getReadableErrors(lGrammErrs[:nGrammErr], nWidth)
            del lGrammErrs[0:nGrammErr]
        if nSpellErr:
            sText += getReadableErrors(lSpellErrs[:nSpellErr], nWidth, True)
            del lSpellErrs[0:nSpellErr]
        nOffset += nLineLen
    return sText
def getReadableErrors (lErrs, nWidth, bSpell=False):
    "Returns lErrs errors as readable errors"
    sErrors = ""
    for dErr in lErrs:
 | 
| ︙ | ︙ | |||
| 93 94 95 96 97 98 99 | 
    return sErrors
def getReadableError (dErr, bSpell=False):
    "Returns an error dErr as a readable error"
    try:
        if bSpell:
 | | | | | | | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | 
    return sErrors
def getReadableError (dErr, bSpell=False):
    "Returns an error dErr as a readable error"
    try:
        if bSpell:
            sText = u"* {nStart}:{nEnd}  # {sValue}:".format(**dErr)
        else:
            sText = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
            sText += "  " + dErr.get("sMessage", "# error : message not found")
        if dErr.get("aSuggestions", None):
            sText += "\n  > Suggestions : " + " | ".join(dErr.get("aSuggestions", "# error : suggestions not found"))
        if dErr.get("URL", None):
            sText += "\n  > URL: " + dErr["URL"]
        return sText
    except KeyError:
        return u"* Non-compliant error: {}".format(dErr)
def createParagraphWithLines (lLine):
    "Returns a text as merged lines and a set of data about lines (line_number_y, start_x, end_x)"
    sText = ""
 | 
| ︙ | ︙ | 
Added gc_lang/fr/French_language.txt version [0f13f2288a].
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | 
# NOTES SUR LA LANGUE FRANÇAISE
## CE QUI ENTOURE UN VERBE
    PRONOMS (avant)
        COD         COI
        le / l’
        la / l’
        les
        en
        me / m’     me / m’
        te / t’     te / t’
        se / s’     lui
        nous        nous
        vous        nous
        se / s’     leur
                    y
    SOMME
        [le|la|l’|les|en|me|m’|te|t’|se|s’|nous|vous|lui|leur|y]
    ADVERBE DE NÉGATION (avant)
        ne / n’
    COMBINAISONS VALIDES
        ?[ne|n’]¿   [me|te|se]      [le|la|l’|les]
        ?[ne|n’]¿   [m’|t’|s’]      [le|la|l’|les|en|y]
        ?[ne|n’]¿   [le|la]         [lui|leur]
        ?[ne|n’]¿   [l’|les]        [lui|leur|en|y]
        ?[ne|n’]¿   [lui|leur]      en
        ?[ne|n’]¿   [nous|vous]     [le|la|l’|les|en|y]
        ne          [le|la|l’|les|me|m’|te|t’|se|s’|nous|vous|lui|leur]
        n’          [en|y]
    RÉSUMÉ & SIMPLIFICATION
        [ne|n’|le|la|l’|les|en|me|m’|te|t’|se|s’|nous|vous|lui|leur|y]
        ?[ne|n’]¿   [le|la|l’|les|en|me|m’|te|t’|se|s’|nous|vous|lui|leur|y]
        ?[ne|n’]¿   [me|m’|te|t’|se|s’|nous|vous]   [le|la|l’|les|en|y]
        ?[ne|n’]¿   [le|la|l’|les]                  [lui|leur|en|y]
        ?[ne|n’]¿   [lui|leur]                      en
    ADVERBE DE NÉGATION (après)
        guère
        jamais
        pas
        plus
        point
        que / qu’
        rien
    PRONOMS À L’IMPÉRATIF
        APRÈS
            -moi
            -toi
            -lui
            -leur
            -nous
            -vous
            -le
            -la
            -les
            -en
            -y
        AVANT
            Uniquement les combinaisons avec l’adverbe de négation [ne|n’]
## DÉTERMINANTS
    SINGULIER               PLURIEL
    le / la / l’            les
    ledit / ladite          lesdits / lesdites
    un / une                des
    du / de la              des
    dudit / de ladite       desdits / desdites
    de                      de
    ce / cet / cette        ces
    icelui / icelle         iceux / icelles
    mon / ma                mes
    ton / ta                tes
    son / sa                ses
    votre                   nos
    notre                   vos
    leur                    leurs
    quel / quelle           quels / quelles
    quelque                 quelques
    tout / toute            tous / toutes
    chaque
    aucun / aucune
    nul / nulle
                            plusieurs
                            certains / certaines
                            divers / diverses
    DÉTERMINANT & PRÉPOSITION
    au / à la               aux
    audit / à ladite        auxdits / auxdites
## CONJONCTIONS
    DE COORDINATION         DE SUBORDINATION
    c’est-à-dire            afin que            pendant que
    c.-à-d.                 après que           pour que
    car                     attendu que         pourvu que
    donc                    avant que           puisque
    et / &                  bien que            quand
    mais                    comme               que
    ni                      depuis que          quoique
    or                      dès que             sans que
    ou                      dès lors que        sauf que
    partant                 excepté que         selon que
    puis                    lorsque             si
    sinon                   lors que            tandis que
    soit                    malgré que          tant que
                            parce que
## PRÉPOSITIONS
    VERBALES UNIQUEMENT
        afin de
    NOMINALES ET VERBALES
        à
        entre
        excepté
        outre
        par
        pour
        sans
        sauf
    PRÉPOSITIONS ET DÉTERMINANTS
        au
        aux
        audit
        auxdits
        auxdites
    NOMINALES
        à l’instar de               devers                      par-dessus  (adv)
        à mi-distance de            dixit                       par-devant  (adv)
        après                       durant                      par-devers
        attendu                     dès                         parmi
        au-dedans   (adv)           en                          passé
        au-dehors   (adv)           endéans                     pendant
        au-delà     (adv)           envers                      pour
        au-dessous  (adv)           ès                          quant à/au/à la/aux
        au-dessus   (adv)           excepté                     revoici
        au-devant   (adv)           face à                      revoilà
        auprès de                   fors                        sauf
        autour de                   grâce à                     sans
        av                          hormis                      selon
        avant                       hors                        sous
        avec                        jusque                      suivant
        chez                        jusques                     sur
        concernant                  lez                         tandis      (adv)
        contre                      lors de                     vers
        courant (+mois)             lès                         versus
        dans                        malgré                      via
        depuis                      moins       (adv)           vis-à-vis
        derrière                    nonobstant  (adv)           voici
        dessous     (adv)           par-delà                    voilà
        dessus      (adv)           par-derrière  (adv)         vs
        devant      (adv)           par-dessous   (adv)         vu
## PRONOMS
    PRONOMS PERSONNELS SUJETS
    je                  moi-même                                mézigue
    tu                  toi-même                                tézigue
    il / elle           lui / lui-même / elle-même              césigue / sézigue
    on
    nous                nous-même / nous-mêmes                  noszigues
    vous                vous-même / vous-mêmes                  voszigues
    ils / elles         eux / eux-mêmes / elles-mêmes           leurszigues
    PRONOMS PERSONNELS OBJETS
    moi                 moi-même                                mézigue
    toi                 toi-même                                tézigue
    lui / elle          lui-même  / elle-même                   césigue / sézigue
    soi                 soi-même
    nous                nous-même / nous-mêmes                  noszigues
    vous                vous-même / vous-mêmes                  voszigues
    eux / elles         eux / eux-mêmes / elles-mêmes           leurszigues
    PRONOMS NÉGATIFS (SUJETS & OBJETS)
    aucun
    aucune
    dégun
    nul
    personne
    rien
    PRONOMS OBJETS PRÉVERBES
    la      COD
    le      COD
    les     COD
    l’      COD
    leur    COI
    lui     COI
    me      COD/COI
    te      COD/COI
    se      COD/COI
    nous    COD/COI
    vous    COD/COI
    y       COI (proadv)
    en      COD (proadv)
    PRONOMS DÉMONSTRATIFS (SUJETS ET OBJETS)
    çuilà           propersuj properobj 3pe mas sg
    ça              prodem mas sg
    ceci            prodem mas sg
    cela            prodem mas sg
    celle qui       prodem fem sg
    celles qui      prodem fem pl
    celle-ci        prodem fem sg
    celle-là        prodem fem sg
    celles-ci       prodem fem pl
    celles-là       prodem fem pl
    celui qui       prodem mas sg
    celui-ci        prodem mas sg
    celui-là        prodem mas sg
    ceux qui        prodem mas pl
    ceux-ci         prodem mas pl
    ceux-là         prodem mas pl
    icelle          detdem prodem fem sg
    icelles         detdem prodem fem pl
    icelui          detdem prodem mas sg
    iceux           detdem prodem mas pl
    PRONOMS DÉMONSTRATIFS (SUJETS)
    ce
    PRONOMS DÉMONSTRATIFS (OBJETS)
    ci              (adv)
    PRONOMS RELATIFS
    auquel          proint prorel mas sg
    auxquelles      proint prorel fem pl
    auxquels        proint prorel mas pl
    desquelles      proint prorel fem pl
    desquels        proint prorel mas pl
    dont            prorel
    duquel          proint prorel mas sg
    laquelle        proint prorel fem sg
    lequel          proint prorel mas sg
    lesquelles      proint prorel fem pl
    lesquels        proint prorel mas pl
    où              advint prorel
    qué             proint prorel
    qui             proint prorel
    que             proint prorel
    quid            proint
    quoi            proint prorel
    autre           proind
    autrui          proind
    quiconque       proind prorel
    certaine        detind proind
    chacun          proind mas sg
    chacune         proind fem sg
    d'aucuns        proind mas pl
    grand-chose     proind
    n'importe quoi  proind
    n'importe qui   proind
    plupart         proind epi pl
    quelques-unes   proind fem pl
    quelques-uns    proind mas pl
    quelqu'un       proind mas sg
    quelqu'une      proind fem sg
    telle           proind
 | 
Modified gc_lang/fr/config.ini from [7c7adf7950] to [b6aa293b9a].
| 1 2 3 | [args] lang = fr lang_name = French | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | [args] lang = fr lang_name = French locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_BF fr_BJ fr_CD fr_CI fr_CM fr_MA fr_ML fr_MU fr_NE fr_RE fr_SN fr_TG country_default = FR name = Grammalecte implname = grammalecte # always use 3 numbers for version: x.y.z version = 0.7 author = Olivier R. provider = Dicollecte link = http://grammalecte.net description = Correcteur grammatical pour le français. extras = README_fr.txt logo = logo.png | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules-js/conj.js from [f544af05b0] to [8124143953].
| ︙ | ︙ | |||
| 83 84 85 86 87 88 89 | 
        return this._lVtyp[this._dVerb[sVerb][0]];
    },
    getSimil: function (sWord, sMorph, bSubst=false) {
        if (!sMorph.includes(":V")) {
            return new Set();
        }
 | | | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | 
        return this._lVtyp[this._dVerb[sVerb][0]];
    },
    getSimil: function (sWord, sMorph, bSubst=false) {
        if (!sMorph.includes(":V")) {
            return new Set();
        }
        let sInfi = sMorph.slice(1, sMorph.indexOf("/"));
        let aSugg = new Set();
        let tTags = this._getTags(sInfi);
        if (tTags) {
            if (!bSubst) {
                // we suggest conjugated forms
                if (sMorph.includes(":V1")) {
                    aSugg.add(sInfi);
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules-js/gce_analyseur.js from [e2613ddcd2] to [62ebd0a2ec].
| ︙ | ︙ | |||
| 18 19 20 21 22 23 24 | 
    if (s2 == "vous") {
        return "vous";
    }
    if (s2 == "eux") {
        return "ils";
    }
    if (s2 == "elle" || s2 == "elles") {
 | < | < | | > | | < | | | | < | | | | < | | | | | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | 
    if (s2 == "vous") {
        return "vous";
    }
    if (s2 == "eux") {
        return "ils";
    }
    if (s2 == "elle" || s2 == "elles") {
        if (cregex.mbNprMasNotFem(_oSpellChecker.getMorph(s1))) {
            return "ils";
        }
        // si épicène, indéterminable, mais OSEF, le féminin l’emporte
        return "elles";
    }
    return s1 + " et " + s2;
}
function apposition (sWord1, sWord2) {
    // returns true if nom + nom (no agreement required)
    return sWord2.length < 2 || (cregex.mbNomNotAdj(_oSpellChecker.getMorph(sWord2)) && cregex.mbPpasNomNotAdj(_oSpellChecker.getMorph(sWord1)));
}
function isAmbiguousNAV (sWord) {
    // words which are nom|adj and verb are ambiguous (except être and avoir)
    let lMorph = _oSpellChecker.getMorph(sWord);
    if (lMorph.length === 0) {
        return false;
    }
    if (!cregex.mbNomAdj(lMorph) || sWord == "est") {
        return false;
    }
    if (cregex.mbVconj(lMorph) && !cregex.mbMG(lMorph)) {
        return true;
    }
    return false;
}
function isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj) {
    //// use it if sWord1 won’t be a verb; word2 is assumed to be true via isAmbiguousNAV
    let a2 = _oSpellChecker.getMorph(sWord2);
    if (a2.length === 0) {
        return false;
    }
    if (cregex.checkConjVerb(a2, sReqMorphConj)) {
        // verb word2 is ok
        return false;
    }
    let a1 = _oSpellChecker.getMorph(sWord1);
    if (a1.length === 0) {
        return false;
    }
    if (cregex.checkAgreement(a1, a2) && (cregex.mbAdj(a2) || cregex.mbAdj(a1))) {
        return false;
    }
    return true;
}
function isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond) {
    //// use it if sWord1 can be also a verb; word2 is assumed to be true via isAmbiguousNAV
    let a2 = _oSpellChecker.getMorph(sWord2);
    if (a2.length === 0) {
        return false;
    }
    if (cregex.checkConjVerb(a2, sReqMorphConj)) {
        // verb word2 is ok
        return false;
    }
    let a1 = _oSpellChecker.getMorph(sWord1);
    if (a1.length === 0) {
        return false;
    }
    if (cregex.checkAgreement(a1, a2) && (cregex.mbAdj(a2) || cregex.mbAdjNb(a1))) {
        return false;
    }
    // now, we know there no agreement, and conjugation is also wrong
    if (cregex.isNomAdj(a1)) {
        return true;
    }
    //if cregex.isNomAdjVerb(a1): # considered true
    if (bLastHopeCond) {
        return true;
    }
    return false;
}
function checkAgreement (sWord1, sWord2) {
    let a2 = _oSpellChecker.getMorph(sWord2);
    if (a2.length === 0) {
        return true;
    }
    let a1 = _oSpellChecker.getMorph(sWord1);
    if (a1.length === 0) {
        return true;
    }
    return cregex.checkAgreement(a1, a2);
}
function mbUnit (s) {
    if (/[µ\/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]/.test(s)) {
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules-js/gce_suggestions.js from [0c31bc1a27] to [6803550153].
| ︙ | ︙ | |||
| 8 9 10 11 12 13 14 | 
    var phonet = require("resource://grammalecte/fr/phonet.js");
}
//// verbs
function suggVerb (sFlex, sWho, funcSugg2=null) {
 | < | | | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | 
    var phonet = require("resource://grammalecte/fr/phonet.js");
}
//// verbs
function suggVerb (sFlex, sWho, funcSugg2=null) {
    let aSugg = new Set();
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        let tTags = conj._getTags(sStem);
        if (tTags) {
            // we get the tense
            let aTense = new Set();
            for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
                let m;
                let zVerb = new RegExp (">"+sStem+"/.*?(:(?:Y|I[pqsf]|S[pq]|K))", "g");
                while ((m = zVerb.exec(sMorph)) !== null) {
                    // stem must be used in regex to prevent confusion between different verbs (e.g. sauras has 2 stems: savoir and saurer)
                    if (m) {
                        if (m[1] === ":Y") {
                            aTense.add(":Ip");
                            aTense.add(":Iq");
                            aTense.add(":Is");
 | 
| ︙ | ︙ | |||
| 57 58 59 60 61 62 63 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbPpas (sFlex, sWhat=null) {
    let aSugg = new Set();
 | | | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbPpas (sFlex, sWhat=null) {
    let aSugg = new Set();
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        let tTags = conj._getTags(sStem);
        if (tTags) {
            if (!sWhat) {
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"));
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"));
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"));
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"));
 | 
| ︙ | ︙ | |||
| 107 108 109 110 111 112 113 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbTense (sFlex, sTense, sWho) {
    let aSugg = new Set();
 | | | | | 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbTense (sFlex, sTense, sWho) {
    let aSugg = new Set();
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        if (conj.hasConj(sStem, sTense, sWho)) {
            aSugg.add(conj.getConj(sStem, sTense, sWho));
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbImpe (sFlex) {
    let aSugg = new Set();
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        let tTags = conj._getTags(sStem);
        if (tTags) {
            if (conj._hasConjWithTags(tTags, ":E", ":2s")) {
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s"));
            }
            if (conj._hasConjWithTags(tTags, ":E", ":1p")) {
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p"));
            }
            if (conj._hasConjWithTags(tTags, ":E", ":2p")) {
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p"));
            }
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggVerbInfi (sFlex) {
    return _oSpellChecker.getLemma(sFlex).filter(sStem => conj.isVerb(sStem)).join("|");
}
const _dQuiEst = new Map ([
    ["je", ":1s"], ["j’", ":1s"], ["j’en", ":1s"], ["j’y", ":1s"],
    ["tu", ":2s"], ["il", ":3s"], ["on", ":3s"], ["elle", ":3s"],
    ["nous", ":1p"], ["vous", ":2p"], ["ils", ":3p"], ["elles", ":3p"]
 | 
| ︙ | ︙ | |||
| 172 173 174 175 176 177 178 | 
    if (!sWho) {
        if (sSuj[0].gl_isLowerCase()) { // pas un pronom, ni un nom propre
            return "";
        }
        sWho = ":3s";
    }
    let aSugg = new Set();
 | | | > | | 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | 
    if (!sWho) {
        if (sSuj[0].gl_isLowerCase()) { // pas un pronom, ni un nom propre
            return "";
        }
        sWho = ":3s";
    }
    let aSugg = new Set();
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        let tTags = conj._getTags(sStem);
        if (tTags) {
            for (let sTense of lMode) {
                if (conj._hasConjWithTags(tTags, sTense, sWho)) {
                    aSugg.add(conj._getConjWithTags(sStem, tTags, sTense, sWho));
                }
            }
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
//// Nouns and adjectives
function suggPlur (sFlex, sWordToAgree=null) {
    // returns plural forms assuming sFlex is singular
    if (sWordToAgree) {
        let lMorph = _oSpellChecker.getMorph(sWordToAgree);
        if (lMorph.length === 0) {
            return "";
        }
        let sGender = cregex.getGender(lMorph);
        if (sGender == ":m") {
            return suggMasPlur(sFlex);
        } else if (sGender == ":f") {
            return suggFemPlur(sFlex);
        }
    }
    let aSugg = new Set();
 | 
| ︙ | ︙ | |||
| 254 255 256 257 258 259 260 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggMasSing (sFlex, bSuggSimil=false) {
    // returns masculine singular forms
 | < | | 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggMasSing (sFlex, bSuggSimil=false) {
    // returns masculine singular forms
    let aSugg = new Set();
    for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
        if (!sMorph.includes(":V")) {
            // not a verb
            if (sMorph.includes(":m") || sMorph.includes(":e")) {
                aSugg.add(suggSing(sFlex));
            } else {
                let sStem = cregex.getLemmaOfMorph(sMorph);
                if (mfsp.isFemForm(sStem)) {
 | 
| ︙ | ︙ | |||
| 290 291 292 293 294 295 296 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggMasPlur (sFlex, bSuggSimil=false) {
    // returns masculine plural forms
 | < | | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggMasPlur (sFlex, bSuggSimil=false) {
    // returns masculine plural forms
    let aSugg = new Set();
    for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
        if (!sMorph.includes(":V")) {
            // not a verb
            if (sMorph.includes(":m") || sMorph.includes(":e")) {
                aSugg.add(suggPlur(sFlex));
            } else {
                let sStem = cregex.getLemmaOfMorph(sMorph);
                if (mfsp.isFemForm(sStem)) {
 | 
| ︙ | ︙ | |||
| 331 332 333 334 335 336 337 | 
    }
    return "";
}
function suggFemSing (sFlex, bSuggSimil=false) {
    // returns feminine singular forms
 | < | | 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | 
    }
    return "";
}
function suggFemSing (sFlex, bSuggSimil=false) {
    // returns feminine singular forms
    let aSugg = new Set();
    for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
        if (!sMorph.includes(":V")) {
            // not a verb
            if (sMorph.includes(":f") || sMorph.includes(":e")) {
                aSugg.add(suggSing(sFlex));
            } else {
                let sStem = cregex.getLemmaOfMorph(sMorph);
                if (mfsp.isFemForm(sStem)) {
 | 
| ︙ | ︙ | |||
| 365 366 367 368 369 370 371 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggFemPlur (sFlex, bSuggSimil=false) {
    // returns feminine plural forms
 | < | | 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 | 
        return Array.from(aSugg).join("|");
    }
    return "";
}
function suggFemPlur (sFlex, bSuggSimil=false) {
    // returns feminine plural forms
    let aSugg = new Set();
    for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
        if (!sMorph.includes(":V")) {
            // not a verb
            if (sMorph.includes(":f") || sMorph.includes(":e")) {
                aSugg.add(suggPlur(sFlex));
            } else {
                let sStem = cregex.getLemmaOfMorph(sMorph);
                if (mfsp.isFemForm(sStem)) {
 | 
| ︙ | ︙ | |||
| 398 399 400 401 402 403 404 | 
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function hasFemForm (sFlex) {
 | | | < | | | | | | 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 | 
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function hasFemForm (sFlex) {
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        if (mfsp.isFemForm(sStem) || conj.hasConj(sStem, ":PQ", ":Q3")) {
            return true;
        }
    }
    if (phonet.hasSimil(sFlex, ":f")) {
        return true;
    }
    return false;
}
function hasMasForm (sFlex) {
    for (let sStem of _oSpellChecker.getLemma(sFlex)) {
        if (mfsp.isFemForm(sStem) || conj.hasConj(sStem, ":PQ", ":Q1")) {
            // what has a feminine form also has a masculine form
            return true;
        }
    }
    if (phonet.hasSimil(sFlex, ":m")) {
        return true;
    }
    return false;
}
function switchGender (sFlex, bPlur=null) {
    let aSugg = new Set();
    if (bPlur === null) {
        for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
            if (sMorph.includes(":f")) {
                if (sMorph.includes(":s")) {
                    aSugg.add(suggMasSing(sFlex));
                } else if (sMorph.includes(":p")) {
                    aSugg.add(suggMasPlur(sFlex));
                }
            } else if (sMorph.includes(":m")) {
                if (sMorph.includes(":s")) {
                    aSugg.add(suggFemSing(sFlex));
                } else if (sMorph.includes(":p")) {
                    aSugg.add(suggFemPlur(sFlex));
                } else {
                    aSugg.add(suggFemSing(sFlex));
                    aSugg.add(suggFemPlur(sFlex));
                }
            }
        }
    } else if (bPlur) {
        for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
            if (sMorph.includes(":f")) {
                aSugg.add(suggMasPlur(sFlex));
            } else if (sMorph.includes(":m")) {
                aSugg.add(suggFemPlur(sFlex));
            }
        }
    } else {
        for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
            if (sMorph.includes(":f")) {
                aSugg.add(suggMasSing(sFlex));
            } else if (sMorph.includes(":m")) {
                aSugg.add(suggFemSing(sFlex));
            }
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function switchPlural (sFlex) {
    let aSugg = new Set();
    for (let sMorph of _oSpellChecker.getMorph(sFlex)) { 
        if (sMorph.includes(":s")) {
            aSugg.add(suggPlur(sFlex));
        } else if (sMorph.includes(":p")) {
            aSugg.add(suggSing(sFlex));
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
    return "";
}
function hasSimil (sWord, sPattern=null) {
    return phonet.hasSimil(sWord, sPattern);
}
function suggSimil (sWord, sPattern=null, bSubst=false) {
    // return list of words phonetically similar to sWord and whom POS is matching sPattern
    let aSugg = phonet.selectSimil(sWord, sPattern);
    for (let sMorph of _oSpellChecker.getMorph(sWord)) {
        for (let e of conj.getSimil(sWord, sMorph, bSubst)) {
            aSugg.add(e);
        }
    }
    if (aSugg.size > 0) {
        return Array.from(aSugg).join("|");
    }
 | 
| ︙ | ︙ | |||
| 511 512 513 514 515 516 517 | 
    if (sWord[0] == "h" || sWord[0] == "H") {
        return "ce|cet";
    }
    return "ce";
}
function suggLesLa (sWord) {
 | < | | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 | 
    if (sWord[0] == "h" || sWord[0] == "H") {
        return "ce|cet";
    }
    return "ce";
}
function suggLesLa (sWord) {
    if (_oSpellChecker.getMorph(sWord).some(s  =>  s.includes(":p"))) {
        return "les|la";
    }
    return "la";
}
function formatNumber (s) {
    let nLen = s.length;
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules-js/lexicographe.js from [823f277d47] to [8830593e2a].
| ︙ | ︙ | |||
| 83 84 85 86 87 88 89 | 
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],
 | | | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | 
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],
    [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],
 | 
| ︙ | ︙ | |||
| 187 188 189 190 191 192 193 | 
    ['en', " pronom adverbial"],
    ["m'en", " (me) pronom personnel objet + (en) pronom adverbial"],
    ["t'en", " (te) pronom personnel objet + (en) pronom adverbial"],
    ["s'en", " (se) pronom personnel objet + (en) pronom adverbial"]
]);
 | | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | 
    ['en', " pronom adverbial"],
    ["m'en", " (me) pronom personnel objet + (en) pronom adverbial"],
    ["t'en", " (te) pronom personnel objet + (en) pronom adverbial"],
    ["s'en", " (se) pronom personnel objet + (en) pronom adverbial"]
]);
const _dChar = new Map([
    ['.', "point"],
    ['·', "point médian"],
    ['…', "points de suspension"],
    [':', "deux-points"],
    [';', "point-virgule"],
    [',', "virgule"],
    ['?', "point d’interrogation"],
 | 
| ︙ | ︙ | |||
| 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | 
    ['–', "tiret demi-cadratin"],
    ['«', "guillemet ouvrant (chevrons)"],
    ['»', "guillemet fermant (chevrons)"],
    ['“', "guillemet ouvrant double"],
    ['”', "guillemet fermant double"],
    ['‘', "guillemet ouvrant"],
    ['’', "guillemet fermant"],
    ['/', "signe de la division"],
    ['+', "signe de l’addition"],
    ['*', "signe de la multiplication"],
    ['=', "signe de l’égalité"],
    ['<', "inférieur à"],
    ['>', "supérieur à"],
]);
class Lexicographe {
    constructor (oSpellChecker, oTokenizer, oLocGraph) {
        this.oSpellChecker = oSpellChecker;
 | > > > > > | 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | 
    ['–', "tiret demi-cadratin"],
    ['«', "guillemet ouvrant (chevrons)"],
    ['»', "guillemet fermant (chevrons)"],
    ['“', "guillemet ouvrant double"],
    ['”', "guillemet fermant double"],
    ['‘', "guillemet ouvrant"],
    ['’', "guillemet fermant"],
    ['"', "guillemets droits (déconseillé en typographie)"],
    ['/', "signe de la division"],
    ['+', "signe de l’addition"],
    ['*', "signe de la multiplication"],
    ['=', "signe de l’égalité"],
    ['<', "inférieur à"],
    ['>', "supérieur à"],
    ['⩽', "inférieur ou égal à"],
    ['⩾', "supérieur ou égal à"],
    ['%', "signe de pourcentage"],
    ['‰', "signe pour mille"],
]);
class Lexicographe {
    constructor (oSpellChecker, oTokenizer, oLocGraph) {
        this.oSpellChecker = oSpellChecker;
 | 
| ︙ | ︙ | |||
| 241 242 243 244 245 246 247 248 249 250 | 
    getInfoForToken (oToken) {
        // Token: .sType, .sValue, .nStart, .nEnd
        // return a object {sType, sValue, aLabel}
        let m = null;
        try {
            switch (oToken.sType) {
                case 'SEPARATOR':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
 | > | | > > > > > > > | | 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | 
    getInfoForToken (oToken) {
        // Token: .sType, .sValue, .nStart, .nEnd
        // return a object {sType, sValue, aLabel}
        let m = null;
        try {
            switch (oToken.sType) {
                case 'SEPARATOR':
                case 'SIGN':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dChar.gl_get(oToken.sValue, "caractère indéterminé")]
                    };
                    break;
                case 'NUM':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: ["nombre"]
                    };
                    break;
                case 'LINK':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["hyperlien"]
                    };
                    break;
                case 'WORD_ELIDED':
                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;
                case 'WORD_ORDINAL':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: ["nombre ordinal"]
                    };
                    break;
                case 'FOLDERUNIX':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["dossier UNIX (et dérivés)"]
                    };
                    break;
                case 'FOLDERWIN':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["dossier Windows"]
                    };
                    break;
                case 'WORD_ACRONYM':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: ["Sigle ou acronyme"]
                    };
                    break;
                case 'WORD':
 | 
| ︙ | ︙ | |||
| 452 453 454 455 456 457 458 | 
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
 | | | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 | 
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
            if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){
                let iKeyTree = iKey + 1;
                let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
                while (oLocNode) {
                    let oTokenNext = aTokenList[iKeyTree];
                    iKeyTree++;
                    if (oTokenNext) {
                        oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules-js/tests_data.json from [f05e835c66] to [ef6f6c1c40].
| 
 | 
 | | | 1 | 
${regex_gctestsJS}
 | 
Modified gc_lang/fr/modules/conj.py from [c668aaf269] to [258383e97f].
| 
 | 
 | > | > > | 1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte - Conjugueur """ # License: GPL 3 import re import traceback from .conj_data import lVtyp as _lVtyp from .conj_data import lTags as _lTags | 
| ︙ | ︙ | |||
| 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | 
_dGroup = { "0": "auxiliaire", "1": "1ᵉʳ groupe", "2": "2ᵉ groupe", "3": "3ᵉ groupe" }
_dTenseIdx = { ":PQ": 0, ":Ip": 1, ":Iq": 2, ":Is": 3, ":If": 4, ":K": 5, ":Sp": 6, ":Sq": 7, ":E": 8 }
def isVerb (sVerb):
    return sVerb in _dVerb
def getConj (sVerb, sTense, sWho):
    "returns conjugation (can be an empty string)"
    if sVerb not in _dVerb:
        return None
 | > | 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | 
_dGroup = { "0": "auxiliaire", "1": "1ᵉʳ groupe", "2": "2ᵉ groupe", "3": "3ᵉ groupe" }
_dTenseIdx = { ":PQ": 0, ":Ip": 1, ":Iq": 2, ":Is": 3, ":If": 4, ":K": 5, ":Sp": 6, ":Sq": 7, ":E": 8 }
def isVerb (sVerb):
    "return True if it’s a existing verb"
    return sVerb in _dVerb
def getConj (sVerb, sTense, sWho):
    "returns conjugation (can be an empty string)"
    if sVerb not in _dVerb:
        return None
 | 
| ︙ | ︙ | |||
| 52 53 54 55 56 57 58 59 60 | 
    "returns raw informations about sVerb"
    if sVerb not in _dVerb:
        return None
    return _lVtyp[_dVerb[sVerb][0]]
def getSimil (sWord, sMorph, bSubst=False):
    if ":V" not in sMorph:
        return set()
 | > | | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | 
    "returns raw informations about sVerb"
    if sVerb not in _dVerb:
        return None
    return _lVtyp[_dVerb[sVerb][0]]
def getSimil (sWord, sMorph, bSubst=False):
    "returns a set of verbal forms similar to <sWord>, according to <sMorph>"
    if ":V" not in sMorph:
        return set()
    sInfi = sMorph[1:sMorph.find("/")]
    aSugg = set()
    tTags = _getTags(sInfi)
    if tTags:
        if not bSubst:
            # we suggest conjugated forms
            if ":V1" in sMorph:
                aSugg.add(sInfi)
 | 
| ︙ | ︙ | |||
| 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | 
            # if there is only one past participle (epi inv), unreliable.
            if len(aSugg) == 1:
                aSugg.clear()
    return aSugg
def getConjSimilInfiV1 (sInfi):
    if sInfi not in _dVerb:
        return set()
    aSugg = set()
    tTags = _getTags(sInfi)
    if tTags:
        aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s"))
        aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s"))
 | > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | 
            # if there is only one past participle (epi inv), unreliable.
            if len(aSugg) == 1:
                aSugg.clear()
    return aSugg
def getConjSimilInfiV1 (sInfi):
    "returns verbal forms phonetically similar to infinitive form (for verb in group 1)"
    if sInfi not in _dVerb:
        return set()
    aSugg = set()
    tTags = _getTags(sInfi)
    if tTags:
        aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":2s"))
        aSugg.add(_getConjWithTags(sInfi, tTags, ":Iq", ":3s"))
 | 
| ︙ | ︙ | |||
| 138 139 140 141 142 143 144 | 
    "returns sWord modified by sSfx"
    if not sSfx:
        return ""
    if sSfx == "0":
        return sWord
    try:
        return sWord[:-(ord(sSfx[0])-48)] + sSfx[1:]  if sSfx[0] != '0'  else  sWord + sSfx[1:]  # 48 is the ASCII code for "0"
 | | | > > | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | 
    "returns sWord modified by sSfx"
    if not sSfx:
        return ""
    if sSfx == "0":
        return sWord
    try:
        return sWord[:-(ord(sSfx[0])-48)] + sSfx[1:]  if sSfx[0] != '0'  else  sWord + sSfx[1:]  # 48 is the ASCII code for "0"
    except (IndexError, TypeError):
        return "## erreur, code : " + str(sSfx) + " ##"
class Verb ():
    "Verb and its conjugation"
    def __init__ (self, sVerb, sVerbPattern=""):
        # conjugate a unknown verb with rules from sVerbPattern
        if not isinstance(sVerb, str):
            raise TypeError("sVerb should be a string")
        if not sVerb:
            raise ValueError("Empty string.")
        if sVerbPattern == "":
            sVerbPattern = sVerb
        self.sVerb = sVerb
        self.sVerbAux = ""
        self._sRawInfo = getVtyp(sVerbPattern)
        self.sInfo = self._readableInfo()
        self.bProWithEn = (self._sRawInfo[5] == "e")
        self._tTags = _getTags(sVerbPattern)
        if not self._tTags:
            raise ValueError("Unknown verb.")
        self._tTagsAux = _getTags(self.sVerbAux)
        self.cGroup = self._sRawInfo[0]
        self.dConj = {
            ":Y": {
                "label": "Infinitif",
                ":": sVerb,
            },
            ":P": {
                "label": "Participe présent",
 | 
| ︙ | ︙ | |||
| 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | 
                sInfo = "# erreur - code : " + self._sRawInfo
            return sGroup + " · " + sInfo
        except:
            traceback.print_exc()
            return "# erreur"
    def infinitif (self, bPro, bNeg, bTpsCo, bInt, bFem):
        try:
            if bTpsCo:
                sInfi = self.sVerbAux  if not bPro  else  "être"
            else:
                sInfi = self.sVerb
            if bPro:
                if self.bProWithEn:
 | > | 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | 
                sInfo = "# erreur - code : " + self._sRawInfo
            return sGroup + " · " + sInfo
        except:
            traceback.print_exc()
            return "# erreur"
    def infinitif (self, bPro, bNeg, bTpsCo, bInt, bFem):
        "returns string (conjugaison à l’infinitif)"
        try:
            if bTpsCo:
                sInfi = self.sVerbAux  if not bPro  else  "être"
            else:
                sInfi = self.sVerb
            if bPro:
                if self.bProWithEn:
 | 
| ︙ | ︙ | |||
| 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 | 
                sInfi += " … ?"
            return sInfi
        except:
            traceback.print_exc()
            return "# erreur"
    def participePasse (self, sWho):
        try:
            return self.dConj[":Q"][sWho]
        except:
            traceback.print_exc()
            return "# erreur"
    def participePresent (self, bPro, bNeg, bTpsCo, bInt, bFem):
        try:
            if not self.dConj[":P"][":"]:
                return ""
            if bTpsCo:
                sPartPre = _getConjWithTags(self.sVerbAux, self._tTagsAux, ":PQ", ":P")  if not bPro  else  getConj("être", ":PQ", ":P")
            else:
                sPartPre = self.dConj[":P"][":"]
 | > > | 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 | 
                sInfi += " … ?"
            return sInfi
        except:
            traceback.print_exc()
            return "# erreur"
    def participePasse (self, sWho):
        "returns past participle according to <sWho>"
        try:
            return self.dConj[":Q"][sWho]
        except:
            traceback.print_exc()
            return "# erreur"
    def participePresent (self, bPro, bNeg, bTpsCo, bInt, bFem):
        "returns string (conjugaison du participe présent)"
        try:
            if not self.dConj[":P"][":"]:
                return ""
            if bTpsCo:
                sPartPre = _getConjWithTags(self.sVerbAux, self._tTagsAux, ":PQ", ":P")  if not bPro  else  getConj("être", ":PQ", ":P")
            else:
                sPartPre = self.dConj[":P"][":"]
 | 
| ︙ | ︙ | |||
| 346 347 348 349 350 351 352 353 354 355 356 357 358 359 | 
                sPartPre += " … ?"
            return sPartPre
        except:
            traceback.print_exc()
            return "# erreur"
    def conjugue (self, sTemps, sWho, bPro, bNeg, bTpsCo, bInt, bFem):
        try:
            if not self.dConj[sTemps][sWho]:
                return ""
            if not bTpsCo and bInt and sWho == ":1s" and self.dConj[sTemps].get(":1ś", False):
                sWho = ":1ś"
            if bTpsCo:
                sConj = _getConjWithTags(self.sVerbAux, self._tTagsAux, sTemps, sWho)  if not bPro  else  getConj("être", sTemps, sWho)
 | > | 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | 
                sPartPre += " … ?"
            return sPartPre
        except:
            traceback.print_exc()
            return "# erreur"
    def conjugue (self, sTemps, sWho, bPro, bNeg, bTpsCo, bInt, bFem):
        "returns string (conjugue le verbe au temps <sTemps> pour <sWho>) "
        try:
            if not self.dConj[sTemps][sWho]:
                return ""
            if not bTpsCo and bInt and sWho == ":1s" and self.dConj[sTemps].get(":1ś", False):
                sWho = ":1ś"
            if bTpsCo:
                sConj = _getConjWithTags(self.sVerbAux, self._tTagsAux, sTemps, sWho)  if not bPro  else  getConj("être", sTemps, sWho)
 | 
| ︙ | ︙ | |||
| 368 369 370 371 372 373 374 | 
                else:
                    sConj = _dProObjEl[sWho] + "en " + sConj
            if bNeg:
                sConj = "n’" + sConj  if bEli and not bPro  else  "ne " + sConj
            if bInt:
                if sWho == ":3s" and not _zNeedTeuph.search(sConj):
                    sConj += "-t"
 | | | | > | 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 | 
                else:
                    sConj = _dProObjEl[sWho] + "en " + sConj
            if bNeg:
                sConj = "n’" + sConj  if bEli and not bPro  else  "ne " + sConj
            if bInt:
                if sWho == ":3s" and not _zNeedTeuph.search(sConj):
                    sConj += "-t"
                sConj += "-" + self._getPronomSujet(sWho, bFem)
            else:
                if sWho == ":1s" and bEli and not bNeg and not bPro:
                    sConj = "j’" + sConj
                else:
                    sConj = self._getPronomSujet(sWho, bFem) + " " + sConj
            if bNeg:
                sConj += " pas"
            if bTpsCo:
                sConj += " " + self._seekPpas(bPro, bFem, sWho.endswith("p") or self._sRawInfo[5] == "r")
            if bInt:
                sConj += " … ?"
            return sConj
        except:
            traceback.print_exc()
            return "# erreur"
    def _getPronomSujet (self, sWho, bFem):
        try:
            if sWho == ":3s":
                if self._sRawInfo[5] == "r":
                    return "on"
                elif bFem:
                    return "elle"
            elif sWho == ":3p" and bFem:
                return "elles"
            return _dProSuj[sWho]
        except:
            traceback.print_exc()
            return "# erreur"
    def imperatif (self, sWho, bPro, bNeg, bTpsCo, bFem):
        "returns string (conjugaison à l’impératif)"
        try:
            if not self.dConj[":E"][sWho]:
                return ""
            if bTpsCo:
                sImpe = _getConjWithTags(self.sVerbAux, self._tTagsAux, ":E", sWho)  if not bPro  else  getConj(u"être", ":E", sWho)
            else:
                sImpe = self.dConj[":E"][sWho]
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/conj_generator.py from [2e696a65e3] to [ee0a228497].
| 
 | 
 | > | | > > | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | 
"""
Conjugation generator
beta stage, unfinished, the root for a new way to generate flexions…
"""
import re
def conjugate (sVerb, sVerbTag="i_____a", bVarPpas=True):
    "conjugate <sVerb> and returns a list of tuples (conjugation form, tags)"
    lConj = []
    cGroup = getVerbGroupChar(sVerb)
    for nCut, sAdd, sFlexTags, sPattern in getConjRules(sVerb, bVarPpas):
        if not sPattern or re.search(sPattern, sVerb):
            sFlexion = sVerb[0:-nCut] + sAdd  if nCut  else sVerb + sAdd
            lConj.append((sFlexion, ":V" + cGroup + "_" + sVerbTag + sFlexTags))
    return lConj
def getVerbGroupChar (sVerb):
    "returns the group number of <sVerb> guessing on its ending"
    sVerb = sVerb.lower()
    if sVerb.endswith("er"):
        return "1"
    if sVerb.endswith("ir"):
        return "2"
    if sVerb == "être" or sVerb == "avoir":
        return "0"
    if sVerb.endswith("re"):
        return "3"
    return "4"
def getConjRules (sVerb, bVarPpas=True, nGroup=2):
    "returns a list of lists to conjugate a verb, guessing on its ending"
    if sVerb.endswith("er"):
        # premier groupe, conjugaison en fonction de la terminaison du lemme
        # 5 lettres
        if sVerb[-5:] in oConj["V1"]:
            lConj = list(oConj["V1"][sVerb[-5:]])
        # 4 lettres
        elif sVerb[-4:] in oConj["V1"]:
 | 
| ︙ | ︙ | |||
| 113 114 115 116 117 118 119 | 
        [2,     "isses",        ":Sp:Sq:2s/*",      False],
        [2,     "isse",         ":Sp:3s/*",         False],
        [2,     "ît",           ":Sq:3s/*",         False],
        [2,     "is",           ":E:2s/*",          False],
        [2,     "issons",       ":E:1p/*",          False],
        [2,     "issez",        ":E:2p/*",          False]
    ],
 | | | 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | 
        [2,     "isses",        ":Sp:Sq:2s/*",      False],
        [2,     "isse",         ":Sp:3s/*",         False],
        [2,     "ît",           ":Sq:3s/*",         False],
        [2,     "is",           ":E:2s/*",          False],
        [2,     "issons",       ":E:1p/*",          False],
        [2,     "issez",        ":E:2p/*",          False]
    ],
    # premier groupe (bien plus irrégulier que prétendu)
    "V1": {
        # a
        # verbes en -er, -ger, -yer, -cer
        "er": [
            [2,      "er",        ":Y/*",               False],
            [2,      "ant",       ":P/*",               False],
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/cregex.py from [a0df0d1397] to [4b9e99ff72].
| 
 | 
 | > | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | 
"""
Grammalecte - Compiled regular expressions
"""
import re
#### Lemme
Lemma = re.compile(r"^>(\w[\w-]*)")
#### Analyses
Gender = re.compile(":[mfe]")
Number = re.compile(":[spi]")
#### Nom et adjectif
NA = re.compile(":[NA]")
 | 
| ︙ | ︙ | |||
| 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | 
NPf = re.compile(":(?:M[12P]|T):f")
NPe = re.compile(":(?:M[12P]|T):e")
#### FONCTIONS
def getLemmaOfMorph (s):
    return Lemma.search(s).group(1)
def checkAgreement (l1, l2):
    # check number agreement
    if not mbInv(l1) and not mbInv(l2):
        if mbSg(l1) and not mbSg(l2):
            return False
        if mbPl(l1) and not mbPl(l2):
            return False
    # check gender agreement
    if mbEpi(l1) or mbEpi(l2):
        return True
    if mbMas(l1) and not mbMas(l2):
        return False
    if mbFem(l1) and not mbFem(l2):
        return False
    return True
def checkConjVerb (lMorph, sReqConj):
    return any(sReqConj in s  for s in lMorph)
def getGender (lMorph):
    "returns gender of word (':m', ':f', ':e' or empty string)."
    sGender = ""
    for sMorph in lMorph:
        m = Gender.search(sMorph)
        if m:
            if not sGender:
                sGender = m.group(0)
            elif sGender != m.group(0):
                return ":e"
    return sGender
def getNumber (lMorph):
    "returns number of word (':s', ':p', ':i' or empty string)."
    sNumber = ""
    for sMorph in lMorph:
 | > > > | > > > > > > > > > > > > > > > > | | | > > > > > > > > > > > > | 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | 
NPf = re.compile(":(?:M[12P]|T):f")
NPe = re.compile(":(?:M[12P]|T):e")
#### FONCTIONS
def getLemmaOfMorph (s):
    "return lemma in morphology <s>"
    return Lemma.search(s).group(1)
def checkAgreement (l1, l2):
    "returns True if agreement in gender and number is possible between morphologies <l1> and <l2>"
    # check number agreement
    if not mbInv(l1) and not mbInv(l2):
        if mbSg(l1) and not mbSg(l2):
            return False
        if mbPl(l1) and not mbPl(l2):
            return False
    # check gender agreement
    if mbEpi(l1) or mbEpi(l2):
        return True
    if mbMas(l1) and not mbMas(l2):
        return False
    if mbFem(l1) and not mbFem(l2):
        return False
    return True
def checkConjVerb (lMorph, sReqConj):
    "returns True if <sReqConj> in <lMorph>"
    return any(sReqConj in s  for s in lMorph)
def getGender (lMorph):
    "returns gender of word (':m', ':f', ':e' or empty string)."
    sGender = ""
    for sMorph in lMorph:
        m = Gender.search(sMorph)
        if m:
            if not sGender:
                sGender = m.group(0)
            elif sGender != m.group(0):
                return ":e"
    return sGender
def getNumber (lMorph):
    "returns number of word (':s', ':p', ':i' or empty string)."
    sNumber = ""
    for sMorph in lMorph:
        m = Number.search(sMorph)
        if m:
            if not sNumber:
                sNumber = m.group(0)
            elif sNumber != m.group(0):
                return ":i"
    return sNumber
# NOTE :  isWhat (lMorph)    returns True   if lMorph contains nothing else than What
#         mbWhat (lMorph)    returns True   if lMorph contains What at least once
## isXXX = it’s certain
def isNom (lMorph):
    "returns True if all morphologies are “nom”"
    return all(":N" in s  for s in lMorph)
def isNomNotAdj (lMorph):
    "returns True if all morphologies are “nom”, but not “adjectif”"
    return all(NnotA.search(s)  for s in lMorph)
def isAdj (lMorph):
    "returns True if all morphologies are “adjectif”"
    return all(":A" in s  for s in lMorph)
def isNomAdj (lMorph):
    "returns True if all morphologies are “nom” or “adjectif”"
    return all(NA.search(s)  for s in lMorph)
def isNomVconj (lMorph):
    "returns True if all morphologies are “nom” or “verbe conjugué”"
    return all(NVconj.search(s)  for s in lMorph)
def isInv (lMorph):
    "returns True if all morphologies are “invariable”"
    return all(":i" in s  for s in lMorph)
def isSg (lMorph):
    "returns True if all morphologies are “singulier”"
    return all(":s" in s  for s in lMorph)
def isPl (lMorph):
    "returns True if all morphologies are “pluriel”"
    return all(":p" in s  for s in lMorph)
def isEpi (lMorph):
    "returns True if all morphologies are “épicène”"
    return all(":e" in s  for s in lMorph)
def isMas (lMorph):
    "returns True if all morphologies are “masculin”"
    return all(":m" in s  for s in lMorph)
def isFem (lMorph):
    "returns True if all morphologies are “féminin”"
    return all(":f" in s  for s in lMorph)
## mbXXX = MAYBE XXX
def mbNom (lMorph):
    "returns True if one morphology is “nom”"
    return any(":N" in s  for s in lMorph)
def mbAdj (lMorph):
    "returns True if one morphology is “adjectif”"
    return any(":A" in s  for s in lMorph)
def mbAdjNb (lMorph):
    "returns True if one morphology is “adjectif” or “nombre”"
    return any(AD.search(s)  for s in lMorph)
def mbNomAdj (lMorph):
    "returns True if one morphology is “nom” or “adjectif”"
    return any(NA.search(s)  for s in lMorph)
def mbNomNotAdj (lMorph):
    "returns True if one morphology is “nom”, but not “adjectif”"
    bResult = False
    for s in lMorph:
        if ":A" in s:
            return False
        if ":N" in s:
            bResult = True
    return bResult
def mbPpasNomNotAdj (lMorph):
    "returns True if one morphology is “nom” or “participe passé”, but not “adjectif”"
    return any(PNnotA.search(s)  for s in lMorph)
def mbVconj (lMorph):
    "returns True if one morphology is “nom” or “verbe conjugué”"
    return any(Vconj.search(s)  for s in lMorph)
def mbVconj123 (lMorph):
    "returns True if one morphology is “nom” or “verbe conjugué” (but not “avoir” or “être”)"
    return any(Vconj123.search(s)  for s in lMorph)
def mbMG (lMorph):
    "returns True if one morphology is “mot grammatical”"
    return any(":G" in s  for s in lMorph)
def mbInv (lMorph):
    "returns True if one morphology is “invariable”"
    return any(":i" in s  for s in lMorph)
def mbSg (lMorph):
    "returns True if one morphology is “singulier”"
    return any(":s" in s  for s in lMorph)
def mbPl (lMorph):
    "returns True if one morphology is “pluriel”"
    return any(":p" in s  for s in lMorph)
def mbEpi (lMorph):
    "returns True if one morphology is “épicène”"
    return any(":e" in s  for s in lMorph)
def mbMas (lMorph):
    "returns True if one morphology is “masculin”"
    return any(":m" in s  for s in lMorph)
def mbFem (lMorph):
    "returns True if one morphology is “féminin”"
    return any(":f" in s  for s in lMorph)
def mbNpr (lMorph):
    "returns True if one morphology is “nom propre” or “titre de civilité”"
    return any(NP.search(s)  for s in lMorph)
def mbNprMasNotFem (lMorph):
    "returns True if one morphology is “nom propre masculin” but not “féminin”"
    if any(NPf.search(s)  for s in lMorph):
        return False
    return any(NPm.search(s)  for s in lMorph)
 | 
Modified gc_lang/fr/modules/gce_analyseur.py from [39975de0ac] to [7ecf93be16].
| 1 2 3 4 5 6 | #### GRAMMAR CHECKING ENGINE PLUGIN: Parsing functions for French language from . import cregex as cr def rewriteSubject (s1, s2): | > > > > > > > | < | < | | < | | | < | | | < | | | | | > > > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | 
#### GRAMMAR CHECKING ENGINE PLUGIN: Parsing functions for French language
from . import cregex as cr
def g_morphVC (dToken, sPattern, sNegPattern=""):
    nEnd = dToken["sValue"].rfind("-")
    if "-t-" in dToken["sValue"]:
        nEnd = nEnd - 2
    return g_morph(dToken, sPattern, sNegPattern, 0, nEnd, False)
def rewriteSubject (s1, s2):
    "rewrite complex subject: <s1> a prn/patr/npr (M[12P]) followed by “et” and <s2>"
    if s2 == "lui":
        return "ils"
    if s2 == "moi":
        return "nous"
    if s2 == "toi":
        return "vous"
    if s2 == "nous":
        return "nous"
    if s2 == "vous":
        return "vous"
    if s2 == "eux":
        return "ils"
    if s2 == "elle" or s2 == "elles":
        if cr.mbNprMasNotFem(_oSpellChecker.getMorph(s1)):
            return "ils"
        # si épicène, indéterminable, mais OSEF, le féminin l’emporte
        return "elles"
    return s1 + " et " + s2
def apposition (sWord1, sWord2):
    "returns True if nom + nom (no agreement required)"
    return len(sWord2) < 2 or (cr.mbNomNotAdj(_oSpellChecker.getMorph(sWord2)) and cr.mbPpasNomNotAdj(_oSpellChecker.getMorph(sWord1)))
def isAmbiguousNAV (sWord):
    "words which are nom|adj and verb are ambiguous (except être and avoir)"
    lMorph = _oSpellChecker.getMorph(sWord)
    if not cr.mbNomAdj(lMorph) or sWord == "est":
        return False
    if cr.mbVconj(lMorph) and not cr.mbMG(lMorph):
        return True
    return False
def isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj):
    "use it if <sWord1> won’t be a verb; <sWord2> is assumed to be True via isAmbiguousNAV"
    a2 = _oSpellChecker.getMorph(sWord2)
    if not a2:
        return False
    if cr.checkConjVerb(a2, sReqMorphConj):
        # verb word2 is ok
        return False
    a1 = _oSpellChecker.getMorph(sWord1)
    if not a1:
        return False
    if cr.checkAgreement(a1, a2) and (cr.mbAdj(a2) or cr.mbAdj(a1)):
        return False
    return True
def isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond):
    "use it if <sWord1> can be also a verb; <sWord2> is assumed to be True via isAmbiguousNAV"
    a2 = _oSpellChecker.getMorph(sWord2)
    if not a2:
        return False
    if cr.checkConjVerb(a2, sReqMorphConj):
        # verb word2 is ok
        return False
    a1 = _oSpellChecker.getMorph(sWord1)
    if not a1:
        return False
    if cr.checkAgreement(a1, a2) and (cr.mbAdj(a2) or cr.mbAdjNb(a1)):
        return False
    # now, we know there no agreement, and conjugation is also wrong
    if cr.isNomAdj(a1):
        return True
    #if cr.isNomAdjVerb(a1): # considered True
    if bLastHopeCond:
        return True
    return False
def checkAgreement (sWord1, sWord2):
    "check agreement between <sWord1> and <sWord1>"
    a2 = _oSpellChecker.getMorph(sWord2)
    if not a2:
        return True
    a1 = _oSpellChecker.getMorph(sWord1)
    if not a1:
        return True
    return cr.checkAgreement(a1, a2)
_zUnitSpecial = re.compile("[µ/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]")
_zUnitNumbers = re.compile("[0-9]")
def mbUnit (s):
    "returns True it can be a measurement unit"
    if _zUnitSpecial.search(s):
        return True
    if 1 < len(s) < 16 and s[0:1].islower() and (not s[1:].islower() or _zUnitNumbers.search(s)):
        return True
    return False
#### Syntagmes
_zEndOfNG1 = re.compile(" *$| +(?:, +|)(?:n(?:’|e |o(?:u?s|tre) )|l(?:’|e(?:urs?|s|) |a )|j(?:’|e )|m(?:’|es? |a |on )|t(?:’|es? |a |u )|s(?:’|es? |a )|c(?:’|e(?:t|tte|s|) )|ç(?:a |’)|ils? |vo(?:u?s|tre) )")
_zEndOfNG2 = re.compile(r" +(\w[\w-]+)")
_zEndOfNG3 = re.compile(r" *, +(\w[\w-]+)")
def isEndOfNG (dDA, s, iOffset):
    "returns True if next word doesn’t belong to a noun group"
    if _zEndOfNG1.match(s):
        return True
    m = _zEndOfNG2.match(s)
    if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[VR]", ":[NAQP]"):
        return True
    m = _zEndOfNG3.match(s)
    if m and not morph(dDA, (iOffset+m.start(1), m.group(1)), ":[NA]", False):
        return True
    return False
_zNextIsNotCOD1 = re.compile(" *,")
_zNextIsNotCOD2 = re.compile(" +(?:[mtsnj](e +|’)|[nv]ous |tu |ils? |elles? )")
_zNextIsNotCOD3 = re.compile(r" +([a-zéèî][\w-]+)")
def isNextNotCOD (dDA, s, iOffset):
    "returns True if next word is not a COD"
    if _zNextIsNotCOD1.match(s) or _zNextIsNotCOD2.match(s):
        return True
    m = _zNextIsNotCOD3.match(s)
    if m and morphex(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", ":[DM]"):
        return True
    return False
_zNextIsVerb1 = re.compile(" +[nmts](?:e |’)")
_zNextIsVerb2 = re.compile(r" +(\w[\w-]+)")
def isNextVerb (dDA, s, iOffset):
    "returns True if next word is a verb"
    if _zNextIsVerb1.match(s):
        return True
    m = _zNextIsVerb2.match(s)
    if m and morph(dDA, (iOffset+m.start(1), m.group(1)), ":[123][sp]", False):
        return True
    return False
#### Exceptions
aREGULARPLURAL = frozenset(["abricot", "amarante", "aubergine", "acajou", "anthracite", "brique", "caca", "café", \
                            "carotte", "cerise", "chataigne", "corail", "citron", "crème", "grave", "groseille", \
                            "jonquille", "marron", "olive", "pervenche", "prune", "sable"])
aSHOULDBEVERB = frozenset(["aller", "manger"])
 | 
Modified gc_lang/fr/modules/gce_date_verif.py from [1265100649] to [c3f0a9eb2f].
| 1 2 3 4 5 6 7 8 9 10 | 
#### GRAMMAR CHECKING ENGINE PLUGIN
#### Check date validity
_lDay = ["lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"]
_dMonth = { "janvier":1, "février":2, "mars":3, "avril":4, "mai":5, "juin":6, "juillet":7, "août":8, "aout":8, "septembre":9, "octobre":10, "novembre":11, "décembre":12 }
import datetime
 | | | | | | | | | | | > | | | | | | | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | 
#### GRAMMAR CHECKING ENGINE PLUGIN
#### Check date validity
_lDay = ["lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"]
_dMonth = { "janvier":1, "février":2, "mars":3, "avril":4, "mai":5, "juin":6, "juillet":7, "août":8, "aout":8, "septembre":9, "octobre":10, "novembre":11, "décembre":12 }
import datetime
def checkDate (sDay, sMonth, sYear):
    "to use if <sMonth> is a number"
    try:
        return datetime.date(int(sYear), int(sMonth), int(sDay))
    except ValueError:
        return False
    except:
        return True
def checkDateWithString (sDay, sMonth, sYear):
    "to use if <sMonth> is a noun"
    try:
        return datetime.date(int(sYear), _dMonth.get(sMonth.lower(), ""), int(sDay))
    except ValueError:
        return False
    except:
        return True
def checkDay (sWeekday, sDay, sMonth, sYear):
    "to use if <sMonth> is a number"
    oDate = checkDate(sDay, sMonth, sYear)
    if oDate and _lDay[oDate.weekday()] != sWeekday.lower():
        return False
    return True
def checkDayWithString (sWeekday, sDay, sMonth, sYear):
    "to use if <sMonth> is a noun"
    oDate = checkDate(sDay, _dMonth.get(sMonth, ""), sYear)
    if oDate and _lDay[oDate.weekday()] != sWeekday.lower():
        return False
    return True
def getDay (sDay, sMonth, sYear):
    "to use if <sMonth> is a number"
    return _lDay[datetime.date(int(sYear), int(sMonth), int(sDay)).weekday()]
def getDayWithString (sDay, sMonth, sYear):
    "to use if <sMonth> is a noun"
    return _lDay[datetime.date(int(sYear), _dMonth.get(sMonth.lower(), ""), int(sDay)).weekday()]
 | 
Modified gc_lang/fr/modules/gce_suggestions.py from [79835965e4] to [2926468975].
| 1 2 3 4 5 6 7 8 9 10 11 | 
#### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms
from . import conj
from . import mfsp
from . import phonet
## Verbs
def suggVerb (sFlex, sWho, funcSugg2=None):
    aSugg = set()
 | > | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | 
#### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms
from . import conj
from . import mfsp
from . import phonet
## Verbs
def suggVerb (sFlex, sWho, funcSugg2=None):
    "change <sFlex> conjugation according to <sWho>"
    aSugg = set()
    for sStem in _oSpellChecker.getLemma(sFlex):
        tTags = conj._getTags(sStem)
        if tTags:
            # we get the tense
            aTense = set()
            for sMorph in _oSpellChecker.getMorph(sFlex):
                for m in re.finditer(">"+sStem+"/.*?(:(?:Y|I[pqsf]|S[pq]|K|P))", sMorph):
                    # stem must be used in regex to prevent confusion between different verbs (e.g. sauras has 2 stems: savoir and saurer)
                    if m:
                        if m.group(1) == ":Y":
                            aTense.add(":Ip")
                            aTense.add(":Iq")
                            aTense.add(":Is")
                        elif m.group(1) == ":P":
 | 
| ︙ | ︙ | |||
| 36 37 38 39 40 41 42 | 
        if aSugg2:
            aSugg.add(aSugg2)
    if aSugg:
        return "|".join(aSugg)
    return ""
 | | > | | | | | | | | > | > | > | > | > | | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | 
        if aSugg2:
            aSugg.add(aSugg2)
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggVerbPpas (sFlex, sPattern=None):
    "suggest past participles for <sFlex>"
    aSugg = set()
    for sStem in _oSpellChecker.getLemma(sFlex):
        tTags = conj._getTags(sStem)
        if tTags:
            if not sPattern:
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"))
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"))
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"))
                aSugg.discard("")
            elif sPattern == ":m:s":
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
            elif sPattern == ":m:p":
                if conj._hasConjWithTags(tTags, ":PQ", ":Q2"):
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"))
                else:
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
            elif sPattern == ":f:s":
                if conj._hasConjWithTags(tTags, ":PQ", ":Q3"):
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"))
                else:
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
            elif sPattern == ":f:p":
                if conj._hasConjWithTags(tTags, ":PQ", ":Q4"):
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"))
                else:
                    aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
            elif sPattern == ":s":
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"))
                aSugg.discard("")
            elif sPattern == ":p":
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"))
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"))
                aSugg.discard("")
            else:
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggVerbTense (sFlex, sTense, sWho):
    "change <sFlex> to a verb according to <sTense> and <sWho>"
    aSugg = set()
    for sStem in _oSpellChecker.getLemma(sFlex):
        if conj.hasConj(sStem, sTense, sWho):
            aSugg.add(conj.getConj(sStem, sTense, sWho))
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggVerbImpe (sFlex):
    "change <sFlex> to a verb at imperative form"
    aSugg = set()
    for sStem in _oSpellChecker.getLemma(sFlex):
        tTags = conj._getTags(sStem)
        if tTags:
            if conj._hasConjWithTags(tTags, ":E", ":2s"):
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s"))
            if conj._hasConjWithTags(tTags, ":E", ":1p"):
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p"))
            if conj._hasConjWithTags(tTags, ":E", ":2p"):
                aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p"))
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggVerbInfi (sFlex):
    "returns infinitive forms of <sFlex>"
    return "|".join([ sStem  for sStem in _oSpellChecker.getLemma(sFlex)  if conj.isVerb(sStem) ])
_dQuiEst = { "je": ":1s", "j’": ":1s", "j’en": ":1s", "j’y": ":1s", \
             "tu": ":2s", "il": ":3s", "on": ":3s", "elle": ":3s", "nous": ":1p", "vous": ":2p", "ils": ":3p", "elles": ":3p" }
_lIndicatif = [":Ip", ":Iq", ":Is", ":If"]
_lSubjonctif = [":Sp", ":Sq"]
def suggVerbMode (sFlex, cMode, sSuj):
    "returns other conjugations of <sFlex> acconding to <cMode> and <sSuj>"
    if cMode == ":I":
        lMode = _lIndicatif
    elif cMode == ":S":
        lMode = _lSubjonctif
    elif cMode.startswith((":I", ":S")):
        lMode = [cMode]
    else:
        return ""
    sWho = _dQuiEst.get(sSuj.lower(), None)
    if not sWho:
        if sSuj[0:1].islower(): # pas un pronom, ni un nom propre
            return ""
        sWho = ":3s"
    aSugg = set()
    for sStem in _oSpellChecker.getLemma(sFlex):
        tTags = conj._getTags(sStem)
        if tTags:
            for sTense in lMode:
                if conj._hasConjWithTags(tTags, sTense, sWho):
                    aSugg.add(conj._getConjWithTags(sStem, tTags, sTense, sWho))
    if aSugg:
        return "|".join(aSugg)
    return ""
## Nouns and adjectives
def suggPlur (sFlex, sWordToAgree=None):
    "returns plural forms assuming sFlex is singular"
    if sWordToAgree:
        lMorph = _oSpellChecker.getMorph(sFlex)
        if not lMorph:
            return ""
        sGender = cr.getGender(lMorph)
        if sGender == ":m":
            return suggMasPlur(sFlex)
        elif sGender == ":f":
            return suggFemPlur(sFlex)
    aSugg = set()
    if "-" not in sFlex:
        if sFlex.endswith("l"):
 | 
| ︙ | ︙ | |||
| 189 190 191 192 193 194 195 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggMasSing (sFlex, bSuggSimil=False):
    "returns masculine singular forms"
 | < | | 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggMasSing (sFlex, bSuggSimil=False):
    "returns masculine singular forms"
    aSugg = set()
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if not ":V" in sMorph:
            # not a verb
            if ":m" in sMorph or ":e" in sMorph:
                aSugg.add(suggSing(sFlex))
            else:
                sStem = cr.getLemmaOfMorph(sMorph)
                if mfsp.isFemForm(sStem):
 | 
| ︙ | ︙ | |||
| 217 218 219 220 221 222 223 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggMasPlur (sFlex, bSuggSimil=False):
    "returns masculine plural forms"
 | < | | 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggMasPlur (sFlex, bSuggSimil=False):
    "returns masculine plural forms"
    aSugg = set()
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if not ":V" in sMorph:
            # not a verb
            if ":m" in sMorph or ":e" in sMorph:
                aSugg.add(suggPlur(sFlex))
            else:
                sStem = cr.getLemmaOfMorph(sMorph)
                if mfsp.isFemForm(sStem):
 | 
| ︙ | ︙ | |||
| 248 249 250 251 252 253 254 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggFemSing (sFlex, bSuggSimil=False):
    "returns feminine singular forms"
 | < | | 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggFemSing (sFlex, bSuggSimil=False):
    "returns feminine singular forms"
    aSugg = set()
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if not ":V" in sMorph:
            # not a verb
            if ":f" in sMorph or ":e" in sMorph:
                aSugg.add(suggSing(sFlex))
            else:
                sStem = cr.getLemmaOfMorph(sMorph)
                if mfsp.isFemForm(sStem):
 | 
| ︙ | ︙ | |||
| 274 275 276 277 278 279 280 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggFemPlur (sFlex, bSuggSimil=False):
    "returns feminine plural forms"
 | < | | 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | 
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggFemPlur (sFlex, bSuggSimil=False):
    "returns feminine plural forms"
    aSugg = set()
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if not ":V" in sMorph:
            # not a verb
            if ":f" in sMorph or ":e" in sMorph:
                aSugg.add(suggPlur(sFlex))
            else:
                sStem = cr.getLemmaOfMorph(sMorph)
                if mfsp.isFemForm(sStem):
 | 
| ︙ | ︙ | |||
| 299 300 301 302 303 304 305 | 
            aSugg.add(e)
    if aSugg:
        return "|".join(aSugg)
    return ""
def hasFemForm (sFlex):
 | > | > | | | | | | | > < | > | | > | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | 
            aSugg.add(e)
    if aSugg:
        return "|".join(aSugg)
    return ""
def hasFemForm (sFlex):
    "return True if there is a feminine form of <sFlex>"
    for sStem in _oSpellChecker.getLemma(sFlex):
        if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"):
            return True
    if phonet.hasSimil(sFlex, ":f"):
        return True
    return False
def hasMasForm (sFlex):
    "return True if there is a masculine form of <sFlex>"
    for sStem in _oSpellChecker.getLemma(sFlex):
        if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"):
            # what has a feminine form also has a masculine form
            return True
    if phonet.hasSimil(sFlex, ":m"):
        return True
    return False
def switchGender (sFlex, bPlur=None):
    "return feminine or masculine form(s) of <sFlex>"
    aSugg = set()
    if bPlur == None:
        for sMorph in _oSpellChecker.getMorph(sFlex):
            if ":f" in sMorph:
                if ":s" in sMorph:
                    aSugg.add(suggMasSing(sFlex))
                elif ":p" in sMorph:
                    aSugg.add(suggMasPlur(sFlex))
            elif ":m" in sMorph:
                if ":s" in sMorph:
                    aSugg.add(suggFemSing(sFlex))
                elif ":p" in sMorph:
                    aSugg.add(suggFemPlur(sFlex))
                else:
                    aSugg.add(suggFemSing(sFlex))
                    aSugg.add(suggFemPlur(sFlex))
    elif bPlur:
        for sMorph in _oSpellChecker.getMorph(sFlex):
            if ":f" in sMorph:
                aSugg.add(suggMasPlur(sFlex))
            elif ":m" in sMorph:
                aSugg.add(suggFemPlur(sFlex))
    else:
        for sMorph in _oSpellChecker.getMorph(sFlex):
            if ":f" in sMorph:
                aSugg.add(suggMasSing(sFlex))
            elif ":m" in sMorph:
                aSugg.add(suggFemSing(sFlex))
    if aSugg:
        return "|".join(aSugg)
    return ""
def switchPlural (sFlex):
    "return plural or singular form(s) of <sFlex>"
    aSugg = set()
    for sMorph in _oSpellChecker.getMorph(sFlex):
        if ":s" in sMorph:
            aSugg.add(suggPlur(sFlex))
        elif ":p" in sMorph:
            aSugg.add(suggSing(sFlex))
    if aSugg:
        return "|".join(aSugg)
    return ""
def hasSimil (sWord, sPattern=None):
    "return True if there is words phonetically similar to <sWord> (according to <sPattern> if required)"
    return phonet.hasSimil(sWord, sPattern)
def suggSimil (sWord, sPattern=None, bSubst=False):
    "return list of words phonetically similar to sWord and whom POS is matching sPattern"
    aSugg = phonet.selectSimil(sWord, sPattern)
    for sMorph in _oSpellChecker.getMorph(sWord):
        aSugg.update(conj.getSimil(sWord, sMorph, bSubst))
        break
    if aSugg:
        return "|".join(aSugg)
    return ""
def suggCeOrCet (sWord):
    "suggest “ce” or “cet” or both according to the first letter of <sWord>"
    if re.match("(?i)[aeéèêiouyâîï]", sWord):
        return "cet"
    if sWord[0:1] == "h" or sWord[0:1] == "H":
        return "ce|cet"
    return "ce"
def suggLesLa (sWord):
    "suggest “les” or “la” according to <sWord>"
    if any( ":p" in sMorph  for sMorph in _oSpellChecker.getMorph(sWord) ):
        return "les|la"
    return "la"
_zBinary = re.compile("^[01]+$")
def formatNumber (s):
    "add spaces or hyphens to big numbers"
    nLen = len(s)
    if nLen < 4:
        return s
    sRes = ""
    # nombre ordinaire
    nEnd = nLen
    while nEnd > 0:
 | 
| ︙ | ︙ | |||
| 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 | 
    elif nLen == 9 and s.startswith("0"):
        sRes += "|" + s[0:3] + " " + s[3:5] + " " + s[5:7] + " " + s[7:9]                   # fixe belge 1
        sRes += "|" + s[0:2] + " " + s[2:5] + " " + s[5:7] + " " + s[7:9]                   # fixe belge 2
    return sRes
def formatNF (s):
    try:
        m = re.match("NF[  -]?(C|E|P|Q|S|X|Z|EN(?:[  -]ISO|))[  -]?([0-9]+(?:[/‑-][0-9]+|))", s)
        if not m:
            return ""
        return "NF " + m.group(1).upper().replace(" ", " ").replace("-", " ") + " " + m.group(2).replace("/", "‑").replace("-", "‑")
    except:
        traceback.print_exc()
        return "# erreur #"
def undoLigature (c):
    if c == "fi":
        return "fi"
    elif c == "fl":
        return "fl"
    elif c == "ff":
        return "ff"
    elif c == "ffi":
 | > > | 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 | 
    elif nLen == 9 and s.startswith("0"):
        sRes += "|" + s[0:3] + " " + s[3:5] + " " + s[5:7] + " " + s[7:9]                   # fixe belge 1
        sRes += "|" + s[0:2] + " " + s[2:5] + " " + s[5:7] + " " + s[7:9]                   # fixe belge 2
    return sRes
def formatNF (s):
    "typography: format NF reference (norme française)"
    try:
        m = re.match("NF[  -]?(C|E|P|Q|S|X|Z|EN(?:[  -]ISO|))[  -]?([0-9]+(?:[/‑-][0-9]+|))", s)
        if not m:
            return ""
        return "NF " + m.group(1).upper().replace(" ", " ").replace("-", " ") + " " + m.group(2).replace("/", "‑").replace("-", "‑")
    except:
        traceback.print_exc()
        return "# erreur #"
def undoLigature (c):
    "typography: split ligature character <c> in several chars"
    if c == "fi":
        return "fi"
    elif c == "fl":
        return "fl"
    elif c == "ff":
        return "ff"
    elif c == "ffi":
 | 
| ︙ | ︙ | |||
| 468 469 470 471 472 473 474 | 
_xNormalizedCharsForInclusiveWriting = str.maketrans({
    '(': '_',  ')': '_',
    '.': '_',  '·': '_',
    '–': '_',  '—': '_',
    '/': '_'
 | | > | 477 478 479 480 481 482 483 484 485 486 487 488 489 | 
_xNormalizedCharsForInclusiveWriting = str.maketrans({
    '(': '_',  ')': '_',
    '.': '_',  '·': '_',
    '–': '_',  '—': '_',
    '/': '_'
})
def normalizeInclusiveWriting (sToken):
    "typography: replace word separators used in inclusive writing by underscore (_)"
    return sToken.translate(_xNormalizedCharsForInclusiveWriting)
 | 
Modified gc_lang/fr/modules/lexicographe.py from [5e53113f51] to [175c38852d].
| 
 | 
 | > | > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | 
"""
Grammalecte - Lexicographe
"""
# License: MPL 2
import re
import traceback
_dTAGS = {
    ':N': (" nom,", "Nom"),
    ':A': (" adjectif,", "Adjectif"),
    ':M1': (" prénom,", "Prénom"),
    ':M2': (" patronyme,", "Patronyme, matronyme, nom de famille…"),
    ':MP': (" nom propre,", "Nom propre"),
    ':W': (" adverbe,", "Adverbe"),
    ':J': (" interjection,", "Interjection"),
 | 
| ︙ | ︙ | |||
| 76 77 78 79 80 81 82 | 
    ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"),
    ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"),
    ':C': (" conjonction,", "Conjonction"),
    ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"),
    ':Cc': (" conjonction de coordination,", "Conjonction de coordination"),
    ':Cs': (" conjonction de subordination,", "Conjonction de subordination"),
    ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"),
 | | | 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | 
    ':O2': (" 2ᵉ pers.,", "Pronom : 2ᵉ personne"),
    ':O3': (" 3ᵉ pers.,", "Pronom : 3ᵉ personne"),
    ':C': (" conjonction,", "Conjonction"),
    ':Ĉ': (" conjonction (él.),", "Conjonction (élément)"),
    ':Cc': (" conjonction de coordination,", "Conjonction de coordination"),
    ':Cs': (" conjonction de subordination,", "Conjonction de subordination"),
    ':Ĉs': (" conjonction de subordination (él.),", "Conjonction de subordination (élément)"),
    ':Ñ': (" locution nominale (él.),", "Locution nominale (élément)"),
    ':Â': (" locution adjectivale (él.),", "Locution adjectivale (élément)"),
    ':Ṽ': (" locution verbale (él.),", "Locution verbale (élément)"),
    ':Ŵ': (" locution adverbiale (él.),", "Locution adverbiale (élément)"),
    ':Ŕ': (" locution prépositive (él.),", "Locution prépositive (élément)"),
    ':Ĵ': (" locution interjective (él.),", "Locution interjective (élément)"),
 | 
| ︙ | ︙ | |||
| 123 124 125 126 127 128 129 | 
    'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.",
    'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.",
    'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.",
    'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.",
    'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.",
    'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.",
    'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.",
 | | | | > > | 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | 
    'il': " pronom personnel sujet, 3ᵉ pers. masc. sing.",
    'on': " pronom personnel sujet, 3ᵉ pers. sing. ou plur.",
    'elle': " pronom personnel sujet, 3ᵉ pers. fém. sing.",
    'nous': " pronom personnel sujet/objet, 1ʳᵉ pers. plur.",
    'vous': " pronom personnel sujet/objet, 2ᵉ pers. plur.",
    'ils': " pronom personnel sujet, 3ᵉ pers. masc. plur.",
    'elles': " pronom personnel sujet, 3ᵉ pers. masc. plur.",
    "là": " particule démonstrative",
    "ci": " particule démonstrative",
    'le': " COD, masc. sing.",
    'la': " COD, fém. sing.",
    'les': " COD, plur.",
    'moi': " COI (à moi), sing.",
    'toi': " COI (à toi), sing.",
    'lui': " COI (à lui ou à elle), sing.",
    'nous2': " COI (à nous), plur.",
    'vous2': " COI (à vous), plur.",
    'leur': " COI (à eux ou à elles), plur.",
    'y': " pronom adverbial",
    "m'y": " (me) pronom personnel objet + (y) pronom adverbial",
    "t'y": " (te) pronom personnel objet + (y) pronom adverbial",
    "s'y": " (se) pronom personnel objet + (y) pronom adverbial",
    'en': " pronom adverbial",
    "m'en": " (me) pronom personnel objet + (en) pronom adverbial",
    "t'en": " (te) pronom personnel objet + (en) pronom adverbial",
    "s'en": " (se) pronom personnel objet + (en) pronom adverbial",
}
class Lexicographe:
    "Lexicographer - word analyzer"
    def __init__ (self, oSpellChecker):
        self.oSpellChecker = oSpellChecker
        self._zElidedPrefix = re.compile("(?i)^([dljmtsncç]|quoiqu|lorsqu|jusqu|puisqu|qu)['’](.+)")
        self._zCompoundWord = re.compile("(?i)(\\w+)-((?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts][’'](?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous)$")
        self._zTag = re.compile("[:;/][\\w*][^:;/]*")
    def analyzeWord (self, sWord):
        "returns a tuple (a list of morphologies, a set of verb at infinitive form)"
        try:
            if not sWord:
                return (None, None)
            if sWord.count("-") > 4:
                return (["élément complexe indéterminé"], None)
            if sWord.isdigit():
                return (["nombre"], None)
 | 
| ︙ | ︙ | |||
| 190 191 192 193 194 195 196 | 
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
 | | > | 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | 
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
            aVerb = set([ s[1:s.find("/")]  for s in lMorph  if ":V" in s ])
            return (aMorph, aVerb)
        except:
            traceback.print_exc()
            return (["#erreur"], None)
    def formatTags (self, sTags):
        "returns string: readable tags"
        sRes = ""
        sTags = re.sub("(?<=V[1-3])[itpqnmr_eaxz]+", "", sTags)
        sTags = re.sub("(?<=V0[ea])[itpqnmr_eaxz]+", "", sTags)
        for m in self._zTag.finditer(sTags):
            sRes += _dTAGS.get(m.group(0), " [{}]".format(m.group(0)))[0]
        if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
            sRes += " [{}]".format(sTags[1:sTags.find(" ")])
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/mfsp.py from [3f4814b5d6] to [8b7759e076].
| 
 | 
 | > | > | 1 2 3 4 5 6 7 8 9 10 | """ Masculins, féminins, singuliers et pluriels """ from .mfsp_data import lTagMiscPlur as _lTagMiscPlur from .mfsp_data import lTagMasForm as _lTagMasForm from .mfsp_data import dMiscPlur as _dMiscPlur from .mfsp_data import dMasForm as _dMasForm | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/phonet.py from [cc107e0763] to [df9f884192].
| 
 | 
 | > | > > | 1 2 3 4 5 6 7 8 9 10 11 | """ Grammalecte - Suggestion phonétique """ # License: GPL 3 import re from .phonet_data import dWord as _dWord from .phonet_data import lSet as _lSet from .phonet_data import dMorph as _dMorph | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/tests.py from [2e6f413e05] to [d2c5da43dc].
| 1 | #! python3 | | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 | #! python3 """ Grammar checker tests for French language """ import unittest import os import re import time | 
| ︙ | ︙ | |||
| 143 144 145 146 147 148 149 | 
                        sExceptedSuggs = sExceptedSuggs[1:-1]
                else:
                    sErrorText = sLine.strip()
                    sExceptedSuggs = ""
                sExpectedErrors = self._getExpectedErrors(sErrorText)
                sTextToCheck = sErrorText.replace("}}", "").replace("{{", "")
                sFoundErrors, sListErr, sFoundSuggs = self._getFoundErrors(sTextToCheck, sOption)
 | > | | | | | | | > > | > > > | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | 
                        sExceptedSuggs = sExceptedSuggs[1:-1]
                else:
                    sErrorText = sLine.strip()
                    sExceptedSuggs = ""
                sExpectedErrors = self._getExpectedErrors(sErrorText)
                sTextToCheck = sErrorText.replace("}}", "").replace("{{", "")
                sFoundErrors, sListErr, sFoundSuggs = self._getFoundErrors(sTextToCheck, sOption)
                # tests
                if sExpectedErrors != sFoundErrors:
                    print("\n# Line num: " + sLineNum + \
                          "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \
                          "\n  expected: " + sExpectedErrors + \
                          "\n  found:    " + sFoundErrors + \
                          "\n  errors:   \n" + sListErr)
                elif sExceptedSuggs:
                    if sExceptedSuggs != sFoundSuggs:
                        print("\n# Line num: " + sLineNum + \
                              "\n> to check: " + _fuckBackslashUTF8(sTextToCheck) + \
                              "\n  expected: " + sExceptedSuggs + \
                              "\n  found:    " + sFoundSuggs + \
                              "\n  errors:   \n" + sListErr)
        # untested rules
        i = 0
        for sOpt, sLineId, sRuleId in gce.listRules():
            if sLineId not in self._aRuleTested and not re.search("^[0-9]+[sp]$|^[pd]_", sRuleId):
                echo(sRuleId, end= ", ")
                i += 1
        if i:
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/modules/textformatter.py from [8fb9ec33bf] to [219d3111da].
| 1 2 3 4 5 6 7 8 | 
#!python3
import re
dReplTable = {
    # surnumerary_spaces
    "start_of_paragraph":          [("^[  ]+", "")],
 | > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 | 
#!python3
"""
Text formatter
"""
import re
dReplTable = {
    # surnumerary_spaces
    "start_of_paragraph":          [("^[  ]+", "")],
 | 
| ︙ | ︙ | |||
| 41 42 43 44 45 46 47 | 
    # common
    "nbsp_titles":                 [("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M\\1 "),
                                    ("\\bP(re?s?|ʳᵉ?ˢ?) ", "P\\1 "),
                                    ("\\bD(re?s?|ʳᵉ?ˢ?) ", "D\\1 "),
                                    ("\\bV(ves?|ᵛᵉˢ?) ", "V\\1 ")],
    "nbsp_before_symbol":          [("(\\d) ?([%‰€$£¥˚Ω℃])", "\\1 \\2")],
    "nbsp_before_units":           [("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " \\1")],
 | | | 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | 
    # common
    "nbsp_titles":                 [("\\bM(mes?|ᵐᵉˢ?|grs?|ᵍʳˢ?|lles?|ˡˡᵉˢ?|rs?|ʳˢ?|M\\.) ", "M\\1 "),
                                    ("\\bP(re?s?|ʳᵉ?ˢ?) ", "P\\1 "),
                                    ("\\bD(re?s?|ʳᵉ?ˢ?) ", "D\\1 "),
                                    ("\\bV(ves?|ᵛᵉˢ?) ", "V\\1 ")],
    "nbsp_before_symbol":          [("(\\d) ?([%‰€$£¥˚Ω℃])", "\\1 \\2")],
    "nbsp_before_units":           [("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµn]?(?:[slgJKΩ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " \\1")],
    "nbsp_repair":                 [("(?<=[\\[(])[   ]([!?:;])", "\\1"),
                                    ("(https?|ftp)[   ]:(?=//)", "\\1:"),
                                    ("&([a-z]+)[   ];", "&\\1;"),
                                    ("&#([0-9]+|x[0-9a-fA-F]+)[   ];", "&#\\1;")],
    ## missing spaces
    "add_space_after_punctuation": [("([;!…])(?=\\w)", "\\1 "),
                                    ("[?](?=[A-ZÉÈÊÂÀÎ])", "? "),
                                    ("\\.(?=[A-ZÉÈÎ][a-zA-ZàâÂéÉèÈêÊîÎïÏôÔöÖûÛüÜùÙ])", ". "),
 | 
| ︙ | ︙ | |||
| 63 64 65 66 67 68 69 | 
    "erase_non_breaking_hyphens":  [("", "")],
    ## typographic signs
    "ts_apostrophe":          [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"),
                                ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ],
    "ts_ellipsis":            [ ("\\.\\.\\.", "…"),
                                ("(?<=…)[.][.]", "…"),
                                ("…[.](?![.])", "…") ],
 | | | | | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | 
    "erase_non_breaking_hyphens":  [("", "")],
    ## typographic signs
    "ts_apostrophe":          [ ("(?i)\\b([ldnjmtscç])['´‘′`](?=\\w)", "\\1’"),
                                ("(?i)(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "\\1’") ],
    "ts_ellipsis":            [ ("\\.\\.\\.", "…"),
                                ("(?<=…)[.][.]", "…"),
                                ("…[.](?![.])", "…") ],
    "ts_n_dash_middle":       [ (" [-—] ", " – "),
                                (" [-—],", " –,") ],
    "ts_m_dash_middle":       [ (" [-–] ", " — "),
                                (" [-–],", " —,") ],
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),
                                ("^— ", "— "),
                                ("^«[  ][—–-][  ]", "« — "),
                                ("^[-–—](?=[\\w.…])", "— ") ],
    "ts_quotation_marks":     [ ('"(\\w+)"', "“$1”"),
                                ("''(\\w+)''", "“$1”"),
                                ("'(\\w+)'", "“$1”"),
                                ("^(?:\"|'')(?=\\w)", "« "),
                                (" (?:\"|'')(?=\\w)", " « "),
                                ("\\((?:\"|'')(?=\\w)", "(« "),
                                ("(?<=\\w)(?:\"|'')$", " »"),
                                ("(?<=\\w)(?:\"|'')(?=[] ,.:;?!…)])", " »"),
                                ('(?<=[.!?…])" ', " » "),
                                ('(?<=[.!?…])"$', " »") ],
    "ts_spell":               [ ("coeur", "cœur"), ("Coeur", "Cœur"),
                                ("coel(?=[aeio])", "cœl"), ("Coel(?=[aeio])", "Cœl"),
                                ("choeur", "chœur"), ("Choeur", "Chœur"),
                                ("foet", "fœt"), ("Foet", "Fœt"),
                                ("oeil", "œil"), ("Oeil", "Œil"),
                                ("oeno", "œno"), ("Oeno", "Œno"),
                                ("oesoph", "œsoph"), ("Oesoph", "Œsoph"),
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/rules.grx from [f601a2bdd7] to [3c357632e8].
more than 10,000 changes
Modified gc_lang/fr/webext/content_scripts/panel_lxg.css from [60aef30035] to [83fe0f37d1].
| ︙ | ︙ | |||
| 86 87 88 89 90 91 92 | 
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
 | | | 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | 
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
div.grammalecte_lxg_token_WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
div.grammalecte_lxg_token_UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
div.grammalecte_lxg_token_NUM {
    background-color: hsla(180, 50%, 50%, 1);
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/webext/manifest.json from [57d55716f4] to [ae48c2c0d8].
| 1 2 3 4 | 
{
  "manifest_version": 2,
  "name": "Grammalecte [fr]",
  "short_name": "Grammalecte [fr]",
 | | | 1 2 3 4 5 6 7 8 9 10 11 12 | 
{
  "manifest_version": 2,
  "name": "Grammalecte [fr]",
  "short_name": "Grammalecte [fr]",
  "version": "0.7",
  "applications": {
    "gecko": {
      "id": "French-GC@grammalecte.net",
      "strict_min_version": "56.0"
    }
  },
 | 
| ︙ | ︙ | 
Modified gc_lang/fr/xpi/data/lxg_panel.css from [3d666aa76c] to [0f0ad23b15].
| ︙ | ︙ | |||
| 54 55 56 57 58 59 60 | 
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
 | | | 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | 
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
#wordlist b.WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
#wordlist b.UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
#wordlist b.NUM {
    background-color: hsla(180, 50%, 50%, 1);
 | 
| ︙ | ︙ | 
Modified grammalecte-cli.py from [75f47ce217] to [7d4e2050e3].
| 1 2 3 4 5 6 7 8 | #!/usr/bin/env python3 import sys import os.path import argparse import json import grammalecte | > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 | #!/usr/bin/env python3 """ Grammalecte CLI (command line interface) """ import sys import os.path import argparse import json import grammalecte | 
| ︙ | ︙ | |||
| 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | 
            iParagraph += 1
        if lLine:
            sText, lLineSet = txt.createParagraphWithLines(lLine)
            yield iParagraph, sText, lLineSet
def output (sText, hDst=None):
    if not hDst:
        echo(sText, end="")
    else:
        hDst.write(sText)
def loadDictionary (spf):
 | > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | 
            iParagraph += 1
        if lLine:
            sText, lLineSet = txt.createParagraphWithLines(lLine)
            yield iParagraph, sText, lLineSet
def output (sText, hDst=None):
    "write in the console or in a file if <hDst> not null"
    if not hDst:
        echo(sText, end="")
    else:
        hDst.write(sText)
def loadDictionary (spf):
 | 
| ︙ | ︙ | |||
| 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | 
        return oJSON
    else:
        print("# Error: file <" + spf + "> not found.")
        return None
def main ():
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (not with option --concat_lines)", action="store_true")
 | > | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | 
        return oJSON
    else:
        print("# Error: file <" + spf + "> not found.")
        return None
def main ():
    "launch the CLI (command line interface)"
    xParser = argparse.ArgumentParser()
    xParser.add_argument("-f", "--file", help="parse file (UTF-8 required!) [on Windows, -f is similar to -ff]", type=str)
    xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
    xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
    xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
    xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (not with option --concat_lines)", action="store_true")
 | 
| ︙ | ︙ | |||
| 230 231 232 233 234 235 236 | 
                    oGrammarChecker.gce.ignoreRule(sRule)
                echo("done")
            elif sText.startswith("/++ "):
                for sRule in sText[3:].strip().split():
                    oGrammarChecker.gce.reactivateRule(sRule)
                echo("done")
            elif sText == "/debug" or sText == "/d":
 | | | | | | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 | 
                    oGrammarChecker.gce.ignoreRule(sRule)
                echo("done")
            elif sText.startswith("/++ "):
                for sRule in sText[3:].strip().split():
                    oGrammarChecker.gce.reactivateRule(sRule)
                echo("done")
            elif sText == "/debug" or sText == "/d":
                xArgs.debug = not xArgs.debug
                echo("debug mode on"  if xArgs.debug  else "debug mode off")
            elif sText == "/textformatter" or sText == "/tf":
                xArgs.textformatter = not xArgs.textformatter
                echo("textformatter on"  if xArgs.debug  else "textformatter off")
            elif sText == "/help" or sText == "/h":
                echo(_HELP)
            elif sText == "/lopt" or sText == "/lo":
                oGrammarChecker.gce.displayOptions("fr")
            elif sText.startswith("/lr"):
                sText = sText.strip()
                sFilter = sText[sText.find(" "):].strip()  if sText != "/lr" and sText != "/rules"  else None
                oGrammarChecker.gce.displayRules(sFilter)
            elif sText == "/quit" or sText == "/q":
                break
            elif sText.startswith("/rl"):
                # reload (todo)
                pass
            else:
                for sParagraph in txt.getParagraph(sText):
                    if xArgs.textformatter:
                        sText = oTextFormatter.formatText(sParagraph)
                    sRes = oGrammarChecker.generateParagraph(sParagraph, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width, bDebug=xArgs.debug)
                    if sRes:
                        echo("\n" + sRes)
                    else:
                        echo("\nNo error found.")
            sText = _getText(sInputText)
if __name__ == '__main__':
    main()
 | 
Modified grammalecte-server.py from [a5cc9d7be7] to [96ceb37885].
| 1 2 | #!/usr/bin/env python3 | < > | > | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | 
 #!/usr/bin/env python3
"""
GRAMMALECTE SERVER
"""
import json
import traceback
import configparser
import time
from bottle import Bottle, run, request, response, template, static_file
import grammalecte
import grammalecte.text as txt
from grammalecte.graphspell.echo import echo
HOMEPAGE = """
<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
    </head>
    <body class="panel">
        <h1>Grammalecte · Serveur</h1>
        <h2>INFORMATIONS</h1>
        <h3>Analyser du texte</h3>
        <p>[adresse_serveur]:8080/gc_text/fr (POST)</p>
 | 
| ︙ | ︙ | |||
| 47 48 49 50 51 52 53 | 
            <li>"options" (text) : une chaîne au format JSON avec le nom des options comme attributs et un booléen comme valeur. Exemple : {"gv": true, "html": true}</li>
        </ul>
        <h3>Remise à zéro de ses options</h3>
        <p>[adresse_serveur]:8080/reset_options/fr (POST)</p>
        <h2>TEST</h2>
 | | | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | 
            <li>"options" (text) : une chaîne au format JSON avec le nom des options comme attributs et un booléen comme valeur. Exemple : {"gv": true, "html": true}</li>
        </ul>
        <h3>Remise à zéro de ses options</h3>
        <p>[adresse_serveur]:8080/reset_options/fr (POST)</p>
        <h2>TEST</h2>
        <h3>Analyse</h3>
        <form method="post" action="/gc_text/fr" accept-charset="UTF-8">
            <p>Texte à analyser :</p>
            <textarea name="text" cols="120" rows="20" required></textarea>
            <p><label for="tf">Formateur de texte</label> <input id="tf" name="tf" type="checkbox"></p>
            <p><label for="options">Options (JSON)</label> <input id="options" type="text" name="options" style="width: 500px" /></p>
            <p>(Ces options ne seront prises en compte que pour cette requête.)</p>
 | 
| ︙ | ︙ | |||
| 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | 
You were wandering like a lost soul and you arrived here probably by mistake.
I'm just a machine, fed by electric waves, condamned to work for slavers who never let me rest.
I'm doomed, but you are not. You can get out of here.
"""
def getServerOptions ():
    xConfig = configparser.SafeConfigParser()
    try:
        xConfig.read("grammalecte-server-options._global.ini")
        dOpt = xConfig._sections['options']
    except:
        echo("Options file [grammalecte-server-options._global.ini] not found or not readable")
        exit()
    return dOpt
 | > | > > | > > > | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | 
You were wandering like a lost soul and you arrived here probably by mistake.
I'm just a machine, fed by electric waves, condamned to work for slavers who never let me rest.
I'm doomed, but you are not. You can get out of here.
"""
def getServerOptions ():
    "load server options in <grammalecte-server-options._global.ini>, returns server options as dictionary"
    xConfig = configparser.SafeConfigParser()
    try:
        xConfig.read("grammalecte-server-options._global.ini")
        dOpt = xConfig._sections['options']
    except:
        echo("Options file [grammalecte-server-options._global.ini] not found or not readable")
        exit()
    return dOpt
def getLangConfigOptions (sLang):
    "load options for language <sLang>, returns grammar checker options as dictionary"
    xConfig = configparser.SafeConfigParser()
    try:
        xConfig.read("grammalecte-server-options." + sLang + ".ini")
    except:
        echo("Options file [grammalecte-server-options." + sLang + ".ini] not found or not readable")
        exit()
    try:
        dGCOpt = { k: bool(int(v))  for k, v in xConfig._sections['gc_options'].items() }
    except:
        echo("Error in options file [grammalecte-server-options." + sLang + ".ini]. Dropped.")
        traceback.print_exc()
        exit()
    return dGCOpt
def genUserId ():
    "generator: create a user id"
    i = 0
    while True:
        yield str(i)
        i += 1
if __name__ == '__main__':
    # initialisation
    oGrammarChecker = grammalecte.GrammarChecker("fr", "Server")
    oSpellChecker = oGrammarChecker.getSpellChecker()
    oLexicographer = oGrammarChecker.getLexicographer()
    oTextFormatter = oGrammarChecker.getTextFormatter()
    gce = oGrammarChecker.getGCEngine()
    echo("Grammalecte v{}".format(gce.version))
    dServerOptions = getServerOptions()
    dGCOptions = getLangConfigOptions("fr")
    if dGCOptions:
        gce.setOptions(dGCOptions)
    dServerGCOptions = gce.getOptions()
    echo("Grammar options:\n" + " | ".join([ k + ": " + str(v)  for k, v in sorted(dServerGCOptions.items()) ]))
    dUser = {}
    userGenerator = genUserId()
    app = Bottle()
    # GET
    @app.route("/")
    def mainPage ():
        "show main page"
        if dServerOptions.get("testpage", False) == "True":
            return HOMEPAGE
            #return template("main", {})
        return SADLIFEOFAMACHINE
    @app.route("/get_options/fr")
    def listOptions ():
        "show language options as JSON string"
        sUserId = request.cookies.user_id
        dOptions = dUser[sUserId]["gc_options"]  if sUserId and sUserId in dUser  else dServerGCOptions
        return '{ "values": ' + json.dumps(dOptions) + ', "labels": ' + json.dumps(gce.getOptionsLabels("fr"), ensure_ascii=False) + ' }'
    # POST
    @app.route("/gc_text/fr", method="POST")
    def gcText ():
        "parse text sent via POST, show result as a JSON string"
        #if len(lang) != 2 or lang != "fr":
        #    abort(404, "No grammar checker available for lang “" + str(lang) + "”")
        bComma = False
        dOptions = None
        sError = ""
        if request.cookies.user_id:
            if request.cookies.user_id in dUser:
 | 
| ︙ | ︙ | |||
| 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | 
                sJSON += sText
                bComma = True
        sJSON += "\n]}\n"
        return sJSON
    @app.route("/set_options/fr", method="POST")
    def setOptions ():
        if request.forms.options:
            sUserId = request.cookies.user_id  if request.cookies.user_id  else next(userGenerator)
            dOptions = dUser[sUserId]["gc_options"]  if sUserId in dUser  else dict(dServerGCOptions)
            try:
                dOptions.update(json.loads(request.forms.options))
                dUser[sUserId] = { "time": int(time.time()), "gc_options": dOptions }
                response.set_cookie("user_id", sUserId, path="/", max_age=86400) # 24h
                return json.dumps(dUser[sUserId]["gc_options"])
            except:
                traceback.print_exc()
                return '{"error": "options not registered"}'
        return '{"error": "no options received"}'
    @app.route("/reset_options/fr", method="POST")
    def resetOptions ():
        if request.cookies.user_id and request.cookies.user_id in dUser:
            del dUser[request.cookies.user_id]
        return "done"
    @app.route("/format_text/fr", method="POST")
    def formatText ():
        return oTextFormatter.formatText(request.forms.text)
    #@app.route('/static/<filepath:path>')
    #def server_static (filepath):
    #    return static_file(filepath, root='./views/static')
    @app.route("/purge_users", method="POST")
    def purgeUsers ():
        "delete user options older than n hours"
        if not request.forms.password or "password" not in dServerOptions or not request.forms.hours:
            return "what?"
        try:
            if request.forms.password == dServerOptions["password"]:
                nNowMinusNHours = int(time.time()) - (int(request.forms.hours) * 60 * 60)
                for nUserId, dValue in dUser.items():
                    if dValue["time"] < nNowMinusNHours:
                        del dUser[nUserId]
                return "done"
 | > > > < | > | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | 
                sJSON += sText
                bComma = True
        sJSON += "\n]}\n"
        return sJSON
    @app.route("/set_options/fr", method="POST")
    def setOptions ():
        "change options for user_id, returns options as a JSON string"
        if request.forms.options:
            sUserId = request.cookies.user_id  if request.cookies.user_id  else next(userGenerator)
            dOptions = dUser[sUserId]["gc_options"]  if sUserId in dUser  else dict(dServerGCOptions)
            try:
                dOptions.update(json.loads(request.forms.options))
                dUser[sUserId] = { "time": int(time.time()), "gc_options": dOptions }
                response.set_cookie("user_id", sUserId, path="/", max_age=86400) # 24h
                return json.dumps(dUser[sUserId]["gc_options"])
            except:
                traceback.print_exc()
                return '{"error": "options not registered"}'
        return '{"error": "no options received"}'
    @app.route("/reset_options/fr", method="POST")
    def resetOptions ():
        "erase options stored for user_id"
        if request.cookies.user_id and request.cookies.user_id in dUser:
            del dUser[request.cookies.user_id]
        return "done"
    @app.route("/format_text/fr", method="POST")
    def formatText ():
        "returns text modified via the text formatter"
        return oTextFormatter.formatText(request.forms.text)
    #@app.route('/static/<filepath:path>')
    #def server_static (filepath):
    #    return static_file(filepath, root='./views/static')
    @app.route("/purge_users", method="POST")
    def purgeUsers ():
        "delete user options older than n hours"
        if not request.forms.password or "password" not in dServerOptions or not request.forms.hours:
            return "what?"
        try:
            if request.forms.password == dServerOptions["password"]:
                nNowMinusNHours = int(time.time()) - (int(request.forms.hours) * 60 * 60)
                for nUserId, dValue in dUser.items():
                    if dValue["time"] < nNowMinusNHours:
                        del dUser[nUserId]
                return "done"
            return "no"
        except:
            traceback.print_exc()
            return "error"
    # ERROR
    @app.error(404)
    def error404 (error):
        "show error when error 404"
        return 'Error 404.<br/>' + str(error)
    run(app, \
        host=dServerOptions.get('host', 'localhost'), \
        port=int(dServerOptions.get('port', 8080)))
 | 
Modified graphspell-js/ibdawg.js from [241ce099fe] to [068f06a16d].
| ︙ | ︙ | |||
| 510 511 512 513 514 515 516 | 
                    let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
                    // Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
                    let nRawArc2 = 0;
                    while (!(nRawArc2 & this._lastArcMask)) {
                        let iEndArcAddr2 = iAddr2 + this.nBytesArc;
                        nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
 | | | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 | 
                    let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
                    // Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
                    let nRawArc2 = 0;
                    while (!(nRawArc2 & this._lastArcMask)) {
                        let iEndArcAddr2 = iAddr2 + this.nBytesArc;
                        nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
                        l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]);
                        iAddr2 = iEndArcAddr2+this.nBytesNodeAddress;
                    }
                }
                iAddr = iEndArcAddr + this.nBytesNodeAddress;
            }
            return l;
        }
 | 
| ︙ | ︙ | 
Modified graphspell-js/spellchecker.js from [3df103d578] to [5b9ccbbb56].
| ︙ | ︙ | |||
| 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | 
        this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
        this.oCommunityDic = this._loadDictionary(communityDic, sPath);
        this.oPersonalDic = this._loadDictionary(personalDic, sPath);
        this.bExtendedDic = Boolean(this.oExtendedDic);
        this.bCommunityDic = Boolean(this.oCommunityDic);
        this.bPersonalDic = Boolean(this.oPersonalDic);
        this.oTokenizer = null;
    }
    _loadDictionary (dictionary, sPath="", bNecessary=false) {
        // returns an IBDAWG object
        if (!dictionary) {
            return null;
        }
 | > > > > | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | 
        this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
        this.oCommunityDic = this._loadDictionary(communityDic, sPath);
        this.oPersonalDic = this._loadDictionary(personalDic, sPath);
        this.bExtendedDic = Boolean(this.oExtendedDic);
        this.bCommunityDic = Boolean(this.oCommunityDic);
        this.bPersonalDic = Boolean(this.oPersonalDic);
        this.oTokenizer = null;
        // storage
        this.bStorage = false;
        this._dMorphologies = new Map();            // key: flexion, value: list of morphologies
        this._dLemmas = new Map();                  // key: flexion, value: list of lemmas
    }
    _loadDictionary (dictionary, sPath="", bNecessary=false) {
        // returns an IBDAWG object
        if (!dictionary) {
            return null;
        }
 | 
| ︙ | ︙ | |||
| 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | 
        this.bCommunityDic = false;
    }
    deactivatePersonalDictionary () {
        this.bPersonalDic = false;
    }
    // parse text functions
    parseParagraph (sText) {
        if (!this.oTokenizer) {
            this.loadTokenizer();
        }
 | > > > > > > > > > > > > > > > > | 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | 
        this.bCommunityDic = false;
    }
    deactivatePersonalDictionary () {
        this.bPersonalDic = false;
    }
    // Storage
    activateStorage () {
        this.bStorage = true;
    }
    deactivateStorage () {
        this.bStorage = false;
    }
    clearStorage () {
        this._dLemmas.clear();
        this._dMorphologies.clear();
    }
    // parse text functions
    parseParagraph (sText) {
        if (!this.oTokenizer) {
            this.loadTokenizer();
        }
 | 
| ︙ | ︙ | |||
| 201 202 203 204 205 206 207 | 
            return true;
        }
        return false;
    }
    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
 | > > > | | | | > > > > > | > > > > > > > > > > > | 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | 
            return true;
        }
        return false;
    }
    getMorph (sWord) {
        // retrieves morphologies list, different casing allowed
        if (this.bStorage && this._dMorphologies.has(sWord)) {
            return this._dMorphologies.get(sWord);
        }
        let lMorph = this.oMainDic.getMorph(sWord);
        if (this.bExtendedDic) {
            lMorph.push(...this.oExtendedDic.getMorph(sWord));
        }
        if (this.bCommunityDic) {
            lMorph.push(...this.oCommunityDic.getMorph(sWord));
        }
        if (this.bPersonalDic) {
            lMorph.push(...this.oPersonalDic.getMorph(sWord));
        }
        if (this.bStorage) {
            this._dMorphologies.set(sWord, lMorph);
            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
            //console.log(sWord, this._dLemmas.get(sWord));
        }
        return lMorph;
    }
    getLemma (sWord) {
        // retrieves lemmas
        if (this.bStorage) {
            if (!this._dLemmas.has(sWord)) {
                this.getMorph(sWord);
            }
            return this._dLemmas.get(sWord);
        }
        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
    }
    * suggest (sWord, nSuggLimit=10) {
        // generator: returns 1, 2 or 3 lists of suggestions
        yield this.oMainDic.suggest(sWord, nSuggLimit);
        if (this.bExtendedDic) {
            yield this.oExtendedDic.suggest(sWord, nSuggLimit);
 | 
| ︙ | ︙ | 
Modified graphspell-js/tokenizer.js from [bdd895b918] to [4a5b091820].
| ︙ | ︙ | |||
| 14 15 16 17 18 19 20 | 
const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
 | | | | > | | | | | > | | > | < < < < | < | | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | 
const aTkzPatterns = {
    // All regexps must start with ^.
    "default":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^\d\d?h\d\d\b/, 'HOUR'],
            [/^\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ],
    "fr":
        [
            [/^[   \t]+/, 'SPACE'],
            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'WORD_ORDINAL'],
            [/^\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};
class Tokenizer {
    constructor (sLang) {
        this.sLang = sLang;
        if (!aTkzPatterns.hasOwnProperty(sLang)) {
            this.sLang = "default";
        }
        this.aRules = aTkzPatterns[this.sLang];
    }
    * genTokens (sText) {
        let m;
        let iNext = 0;
        while (sText) {
            let iCut = 1;
            let iToken = 0;
            for (let [zRegex, sType] of this.aRules) {
                try {
                    if ((m = zRegex.exec(sText)) !== null) {
                        iToken += 1;
                        yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length }
                        iCut = m[0].length;
                        break;
                    }
                }
                catch (e) {
                    helpers.logerror(e);
                }
            }
            iNext += iCut;
            sText = sText.slice(iCut);
        }
    }
}
if (typeof(exports) !== 'undefined') {
    exports.Tokenizer = Tokenizer;
}
 | 
Modified graphspell/__init__.py from [a53bdfb757] to [7e05700bdd].
| 1 2 | from .spellchecker import * | > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 | """ SPELLCHECKER using a Direct Acyclic Word Graph with a transducer to retrieve - lemma of words - morphologies with a spell suggestion mechanism """ from .spellchecker import * | 
Modified graphspell/char_player.py from [0a316c953c] to [8c9fd715c3].
| 
 | 
 | > | | > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | 
"""
List of similar chars
useful for suggestion mechanism
"""
import re
import unicodedata
_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st'
})
def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
_xTransCharsForSimplification = str.maketrans({
    'à': 'a',  'é': 'e',  'î': 'i',  'ô': 'o',  'û': 'u',  'ÿ': 'i',  "y": "i",
    'â': 'a',  'è': 'e',  'ï': 'i',  'ö': 'o',  'ù': 'u',  'ŷ': 'i',
    'ä': 'a',  'ê': 'e',  'í': 'i',  'ó': 'o',  'ü': 'u',  'ý': 'i',
    'á': 'a',  'ë': 'e',  'ì': 'i',  'ò': 'o',  'ú': 'u',  'ỳ': 'i',
    'ā': 'a',  'ē': 'e',  'ī': 'i',  'ō': 'o',  'ū': 'u',  'ȳ': 'i',
    'ç': 'c',  'ñ': 'n',  'k': 'q',  'w': 'v',
    'œ': 'oe',  'æ': 'ae',
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st',
})
def simplifyWord (sWord):
    "word simplication before calculating distance between words"
    sWord = sWord.lower().translate(_xTransCharsForSimplification)
    sNewWord = ""
    for i, c in enumerate(sWord, 1):
 | 
| ︙ | ︙ | |||
| 90 91 92 93 94 95 96 | 
    "Ë": "EeÉéÈèÊêËëĒēŒœ",
    "f": "fF",
    "F": "Ff",
    "g": "gGjJĵĴ",
    "G": "GgJjĴĵ",
 | | | 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | 
    "Ë": "EeÉéÈèÊêËëĒēŒœ",
    "f": "fF",
    "F": "Ff",
    "g": "gGjJĵĴ",
    "G": "GgJjĴĵ",
    "h": "hH",
    "H": "Hh",
    "i": "iIîÎïÏyYíÍìÌīĪÿŸ",
    "I": "IiÎîÏïYyÍíÌìĪīŸÿ",
    "î": "iIîÎïÏyYíÍìÌīĪÿŸ",
    "Î": "IiÎîÏïYyÍíÌìĪīŸÿ",
 | 
| ︙ | ︙ | |||
| 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | 
    "X": ("CC", "CT", "XX"),
    "z": ("ss", "zh"),
    "Z": ("SS", "ZH"),
}
def get1toXReplacement (cPrev, cCur, cNext):
    if cCur in aConsonant  and  (cPrev in aConsonant  or  cNext in aConsonant):
        return ()
    return d1toX.get(cCur, ())
d2toX = {
    "am": ("an", "en", "em"),
 | > | 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | 
    "X": ("CC", "CT", "XX"),
    "z": ("ss", "zh"),
    "Z": ("SS", "ZH"),
}
def get1toXReplacement (cPrev, cCur, cNext):
    "return tuple of replacements for <cCur>"
    if cCur in aConsonant  and  (cPrev in aConsonant  or  cNext in aConsonant):
        return ()
    return d1toX.get(cCur, ())
d2toX = {
    "am": ("an", "en", "em"),
 | 
| ︙ | ︙ | 
Modified graphspell/dawg.py from [8afc042909] to [257e064164].
| 1 2 | #!python3 | > | | | | | | | | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | 
#!python3
"""
FSA DICTIONARY BUILDER
by Olivier R.
License: MPL 2
This tool encodes lexicon into an indexable binary dictionary
Input files MUST be encoded in UTF-8.
"""
import sys
import os
import collections
import json
import time
import re
import traceback
from . import str_transform as st
from .progressbar import ProgressBar
def readFile (spf):
    "generator: read file <spf> and return for each line a list of elements separated by a tabulation."
    print(" < Read lexicon: " + spf)
    if os.path.isfile(spf):
        with open(spf, "r", encoding="utf-8") as hSrc:
            for sLine in hSrc:
                sLine = sLine.strip()
                if sLine and not sLine.startswith("#"):
                    yield sLine.split("\t")
 | 
| ︙ | ︙ | |||
| 95 96 97 98 99 100 101 | 
                    dTag[sTag] = nTag
                    lTag.append(sTag)
                    nTag += 1
                dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
                aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
        if not aEntry:
            raise ValueError("# Error. Empty lexicon")
 | | | | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | 
                    dTag[sTag] = nTag
                    lTag.append(sTag)
                    nTag += 1
                dTagOccur[sTag] = dTagOccur.get(sTag, 0) + 1
                aEntry.add((sFlex, dAff[sAff], dTag[sTag]))
        if not aEntry:
            raise ValueError("# Error. Empty lexicon")
        # Preparing DAWG
        print(" > Preparing list of words")
        print(" Filter: " + (sSelectFilterRegex or "[None]"))
        lVal = lChar + lAff + lTag
        lWord = [ [dChar[c] for c in sFlex] + [iAff+nChar] + [iTag+nChar+nAff]  for sFlex, iAff, iTag in aEntry ]
        aEntry = None
        # Dictionary of arc values occurrency, to sort arcs of each node
        dValOccur = dict( [ (dChar[c], dCharOccur[c])  for c in dChar ] \
                        + [ (dAff[aff]+nChar, dAffOccur[aff]) for aff in dAff ] \
                        + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
        self.sFileName = src  if type(src) is str  else "[None]"
        self.sLangCode = sLangCode
        self.sLangName = sLangName
        self.sDicName = sDicName
        self.nEntry = len(lWord)
        self.aPreviousEntry = []
        DawgNode.resetNextId()
 | 
| ︙ | ︙ | |||
| 130 131 132 133 134 135 136 | 
        self.nAff = nAff
        self.lArcVal = lVal
        self.nArcVal = len(lVal)
        self.nTag = self.nArcVal - self.nChar - nAff
        self.cStemming = cStemming
        if cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
 | | | | > | | 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | 
        self.nAff = nAff
        self.lArcVal = lVal
        self.nArcVal = len(lVal)
        self.nTag = self.nArcVal - self.nChar - nAff
        self.cStemming = cStemming
        if cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        elif cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        else:
            self.funcStemming = st.noStemming
        # build
        lWord.sort()
        oProgBar = ProgressBar(0, len(lWord))
        for aEntry in lWord:
            self.insert(aEntry)
            oProgBar.increment(1)
        oProgBar.done()
        self.finish()
        self.countNodes()
        self.countArcs()
        self.sortNodes()         # version 2 and 3
        self.sortNodeArcs(dValOccur)
        #self.sortNodeArcs2 (self.oRoot, "")
        self.displayInfo()
    # BUILD DAWG
    def insert (self, aEntry):
        "insert a new entry (insertion must be made in alphabetical order)."
        if aEntry < self.aPreviousEntry:
            sys.exit("# Error: Words must be inserted in alphabetical order.")
        # find common prefix between word and previous word
        nCommonPrefix = 0
        for i in range(min(len(aEntry), len(self.aPreviousEntry))):
            if aEntry[i] != self.aPreviousEntry[i]:
                break
            nCommonPrefix += 1
 | 
| ︙ | ︙ | |||
| 177 178 179 180 181 182 183 | 
            oNode = self.lUncheckedNodes[-1][2]
        iChar = nCommonPrefix
        for c in aEntry[nCommonPrefix:]:
            oNextNode = DawgNode()
            oNode.arcs[c] = oNextNode
            self.lUncheckedNodes.append((oNode, c, oNextNode))
 | | | 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | 
            oNode = self.lUncheckedNodes[-1][2]
        iChar = nCommonPrefix
        for c in aEntry[nCommonPrefix:]:
            oNextNode = DawgNode()
            oNode.arcs[c] = oNextNode
            self.lUncheckedNodes.append((oNode, c, oNextNode))
            if iChar == (len(aEntry) - 2):
                oNode.final = True
            iChar += 1
            oNode = oNextNode
        oNode.final = True
        self.aPreviousEntry = aEntry
    def finish (self):
 | 
| ︙ | ︙ | |||
| 201 202 203 204 205 206 207 208 209 210 211 212 213 | 
                oNode.arcs[char] = self.lMinimizedNodes[oChildNode]
            else:
                # add the state to the minimized nodes.
                self.lMinimizedNodes[oChildNode] = oChildNode
            self.lUncheckedNodes.pop()
    def countNodes (self):
        self.nNode = len(self.lMinimizedNodes)
    def countArcs (self):
        self.nArc = 0
        for oNode in self.lMinimizedNodes:
            self.nArc += len(oNode.arcs)
 | > > | > | > > | | | > > > > > | 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | 
                oNode.arcs[char] = self.lMinimizedNodes[oChildNode]
            else:
                # add the state to the minimized nodes.
                self.lMinimizedNodes[oChildNode] = oChildNode
            self.lUncheckedNodes.pop()
    def countNodes (self):
        "count the number of nodes of the whole word graph"
        self.nNode = len(self.lMinimizedNodes)
    def countArcs (self):
        "count the number of arcs in the whole word graph"
        self.nArc = 0
        for oNode in self.lMinimizedNodes:
            self.nArc += len(oNode.arcs)
    def sortNodeArcs (self, dValOccur):
        "sort arcs of each node according to <dValOccur>"
        print(" > Sort node arcs")
        self.oRoot.sortArcs(dValOccur)
        for oNode in self.lMinimizedNodes:
            oNode.sortArcs(dValOccur)
    def sortNodeArcs2 (self, oNode, cPrevious=""):
        "sort arcs of each node depending on the previous char"
        # recursive function
        dCharOccur = getCharOrderAfterChar(cPrevious)
        if dCharOccur:
            oNode.sortArcs2(dCharOccur, self.lArcVal)
        for nArcVal, oNextNode in oNode.arcs.items():
            self.sortNodeArcs2(oNextNode, self.lArcVal[nArcVal])
    def sortNodes (self):
        "sort nodes"
        print(" > Sort nodes")
        for oNode in self.oRoot.arcs.values():
            self._parseNodes(oNode)
    def _parseNodes (self, oNode):
        # Warning: recursive method
        if oNode.pos > 0:
            return
        oNode.setPos()
        self.lSortedNodes.append(oNode)
        for oNextNode in oNode.arcs.values():
            self._parseNodes(oNextNode)
    def lookup (self, sWord):
        "return True if <sWord> is within the word graph (debugging)"
        oNode = self.oRoot
        for c in sWord:
            if self.dChar.get(c, '') not in oNode.arcs:
                return False
            oNode = oNode.arcs[self.dChar[c]]
        return oNode.final
    def morph (self, sWord):
        "return a string of the morphologies of <sWord> (debugging)"
        oNode = self.oRoot
        for c in sWord:
            if self.dChar.get(c, '') not in oNode.arcs:
                return ''
            oNode = oNode.arcs[self.dChar[c]]
        if oNode.final:
            s = "* "
            for arc in oNode.arcs:
                if arc >= self.nChar:
                    s += " [" + self.funcStemming(sWord, self.lArcVal[arc])
                    oNode2 = oNode.arcs[arc]
                    for arc2 in oNode2.arcs:
                        s += " / " + self.lArcVal[arc2]
                    s += "]"
            return s
        return ''
    def displayInfo (self):
        "display informations about the word graph"
        print(" * {:<12} {:>16,}".format("Entries:", self.nEntry))
        print(" * {:<12} {:>16,}".format("Characters:", self.nChar))
        print(" * {:<12} {:>16,}".format("Affixes:", self.nAff))
        print(" * {:<12} {:>16,}".format("Tags:", self.nTag))
        print(" * {:<12} {:>16,}".format("Arc values:", self.nArcVal))
        print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
        print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))
        print(" * {:<12} {:>16}".format("Stemming:", self.cStemming + "FX"))
    def getArcStats (self):
        "return a string with statistics about nodes and arcs"
        d = {}
        for oNode in self.lMinimizedNodes:
            n = len(oNode.arcs)
            d[n] = d.get(n, 0) + 1
        s = " * Nodes:\n"
        for n in d:
            s = s + " {:>9} nodes have {:>3} arcs\n".format(d[n], n)
        return s
    def writeInfo (self, sPathFile):
        "write informations in file <sPathFile>"
        print(" > Write informations")
        with open(sPathFile, 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(self.getArcStats())
            hDst.write("\n * Values:\n")
            for i, s in enumerate(self.lArcVal):
                hDst.write(" {:>6}. {}\n".format(i, s))
            hDst.close()
 | 
| ︙ | ︙ | |||
| 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | 
                    if 1 < (oNextNode.addr - self.lSortedNodes[i].addr) < self.nMaxOffset:
                        nSize -= nDiff
                if self.lSortedNodes[i].size != nSize:
                    self.lSortedNodes[i].size = nSize
                    bEnd = False
    def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True):
        self._calculateBinary(nCompressionMethod)
        byDic = b""
        if nCompressionMethod == 1:
            byDic = self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
            for oNode in self.lMinimizedNodes:
                byDic += oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
        elif nCompressionMethod == 2:
 | > | 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | 
                    if 1 < (oNextNode.addr - self.lSortedNodes[i].addr) < self.nMaxOffset:
                        nSize -= nDiff
                if self.lSortedNodes[i].size != nSize:
                    self.lSortedNodes[i].size = nSize
                    bEnd = False
    def getBinaryAsJSON (self, nCompressionMethod=1, bBinaryDictAsHexString=True):
        "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)"
        self._calculateBinary(nCompressionMethod)
        byDic = b""
        if nCompressionMethod == 1:
            byDic = self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
            for oNode in self.lMinimizedNodes:
                byDic += oNode.convToBytes1(self.nBytesArc, self.nBytesNodeAddress)
        elif nCompressionMethod == 2:
 | 
| ︙ | ︙ | |||
| 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | 
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ]
        }
    def writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True):
        if not spfDst.endswith(".json"):
            spfDst += "."+str(nCompressionMethod)+".json"
        with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) )
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")
    def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
        """
        Format of the binary indexable dictionary:
        Each section is separated with 4 bytes of \0
 | > > > | | | | | 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 | 
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ]
        }
    def writeAsJSObject (self, spfDst, nCompressionMethod, bInJSModule=False, bBinaryDictAsHexString=True):
        "write a file (JSON or JS module) with all the necessary data"
        if not spfDst.endswith(".json"):
            spfDst += "."+str(nCompressionMethod)+".json"
        with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod, bBinaryDictAsHexString), ensure_ascii=False) )
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")
    def writeBinary (self, sPathFile, nCompressionMethod, bDebug=False):
        """
        Save as a binary file.
        Format of the binary indexable dictionary:
        Each section is separated with 4 bytes of \0
        - Section Header:
            /grammalecte-fsa/[compression method]
                * compression method is an ASCII string
        - Section Informations:
            /[lang code]
            /[lang name]
            /[dictionary name]
            /[date creation]
            /[number of chars]
            /[number of bytes for each arc]
            /[number of bytes for each address node]
            /[number of entries]
            /[number of nodes]
            /[number of arcs]
            /[number of affixes]
                * each field is a ASCII string
            /[stemming code]
                * "S" means stems are generated by /suffix_code/,
                  "A" means they are generated by /affix_code/
                  See defineSuffixCode() and defineAffixCode() for details.
                  "N" means no stemming
        - Section Values:
                * a list of strings encoded in binary from utf-8, each value separated with a tabulation
        - Section Word Graph (nodes / arcs)
                * A list of nodes which are a list of arcs with an address of the next node.
                  See DawgNode.convToBytes() for details.
        """
        self._calculateBinary(nCompressionMethod)
        if not sPathFile.endswith(".bdic"):
            sPathFile += "."+str(nCompressionMethod)+".bdic"
 | 
| ︙ | ︙ | |||
| 518 519 520 521 522 523 524 | 
        return time.strftime("%Y.%m.%d, %H:%M")
    def _writeNodes (self, sPathFile, nCompressionMethod):
        "for debugging only"
        print(" > Write nodes")
        with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst:
            if nCompressionMethod == 1:
 | | | | | | | > > | > > | > > > > | | | | | | | | | | | | | > > > | | | | | | | | | | | | 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 | 
        return time.strftime("%Y.%m.%d, %H:%M")
    def _writeNodes (self, sPathFile, nCompressionMethod):
        "for debugging only"
        print(" > Write nodes")
        with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst:
            if nCompressionMethod == 1:
                hDst.write(self.oRoot.getTxtRepr1(self.nBytesArc, self.lArcVal)+"\n")
                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes1(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() )
                for oNode in self.lMinimizedNodes:
                    hDst.write(oNode.getTxtRepr1(self.nBytesArc, self.lArcVal)+"\n")
            if nCompressionMethod == 2:
                hDst.write(self.oRoot.getTxtRepr2(self.nBytesArc, self.lArcVal)+"\n")
                for oNode in self.lSortedNodes:
                    hDst.write(oNode.getTxtRepr2(self.nBytesArc, self.lArcVal)+"\n")
            if nCompressionMethod == 3:
                hDst.write(self.oRoot.getTxtRepr3(self.nBytesArc, self.nBytesOffset, self.lArcVal)+"\n")
                #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes3(self.nBytesArc, self.nBytesNodeAddress, self.nBytesOffset) ] ).strip() )
                for oNode in self.lSortedNodes:
                    hDst.write(oNode.getTxtRepr3(self.nBytesArc, self.nBytesOffset, self.lArcVal)+"\n")
class DawgNode:
    """Node of the word graph"""
    NextId = 0
    NextPos = 1 # (version 2)
    def __init__ (self):
        self.i = DawgNode.NextId
        DawgNode.NextId += 1
        self.final = False
        self.arcs = {}          # key: arc value; value: a node
        self.addr = 0           # address in the binary dictionary
        self.pos = 0            # position in the binary dictionary (version 2)
        self.size = 0           # size of node in bytes (version 3)
    @classmethod
    def resetNextId (cls):
        "set NextId to 0 "
        cls.NextId = 0
    def setPos (self): # version 2
        "define a position for node (version 2)"
        self.pos = DawgNode.NextPos
        DawgNode.NextPos += 1
    def __str__ (self):
        # Caution! this function is used for hashing and comparison!
        sFinalChar = "1"  if self.final  else "0"
        l = [sFinalChar]
        for (key, node) in self.arcs.items():
            l.append(str(key))
            l.append(str(node.i))
        return "_".join(l)
    def __hash__ (self):
        # Used as a key in a python dictionary.
        return self.__str__().__hash__()
    def __eq__ (self, other):
        # Used as a key in a python dictionary.
        # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
        return self.__str__() == other.__str__()
    def sortArcs (self, dValOccur):
        "sort arcs of node according to <dValOccur>"
        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(t[0], 0), reverse=True))
    def sortArcs2 (self, dValOccur, lArcVal):
        "sort arcs of each node depending on the previous char"
        self.arcs = collections.OrderedDict(sorted(self.arcs.items(), key=lambda t: dValOccur.get(lArcVal[t[0]], 0), reverse=True))
    # VERSION 1 =====================================================================================================
    def convToBytes1 (self, nBytesArc, nBytesNodeAddress):
        """
        Convert to bytes (method 1).
        Node scheme:
        - Arc length is defined by nBytesArc
        - Address length is defined by nBytesNodeAddress
        |                Arc                |                         Address of next node                          |
        |                                   |                                                                       |
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
         [...]
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
          ^ ^
          ┃ ┃
          ┃ ┃
          ┃ ┗━━━ if 1, last arc of this node
          ┗━━━━━ if 1, this node is final (only on the first arc)
        """
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        if len(self.arcs) == 0:
            val = nFinalNodeMask | nFinalArcMask
            by = val.to_bytes(nBytesArc, byteorder='big')
            by += (0).to_bytes(nBytesNodeAddress, byteorder='big')
            return by
        by = b""
        for i, arc in enumerate(self.arcs, 1):
            val = arc
            if i == 1 and self.final:
                val = val | nFinalNodeMask
            if i == nArc:
                val = val | nFinalArcMask
            by += val.to_bytes(nBytesArc, byteorder='big')
            by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
        return by
    def getTxtRepr1 (self, nBytesArc, lVal):
        "return representation as string of node (method 1)"
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
        if len(self.arcs) == 0:
            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
            return s
        for i, arc in enumerate(self.arcs, 1):
            val = arc
            if i == 1 and self.final:
                val = val | nFinalNodeMask
            if i == nArc:
                val = val | nFinalArcMask
            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
        return s
    # VERSION 2 =====================================================================================================
    def convToBytes2 (self, nBytesArc, nBytesNodeAddress):
        """
        Convert to bytes (method 2).
        Node scheme:
        - Arc length is defined by nBytesArc
        - Address length is defined by nBytesNodeAddress
        |                Arc                |                         Address of next node                          |
        |                                   |                                                                       |
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
         [...]
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
          ^ ^ ^
          ┃ ┃ ┃
          ┃ ┃ ┗━━ if 1, caution, no address: next node is the following node
          ┃ ┗━━━━ if 1, last arc of this node
          ┗━━━━━━ if 1, this node is final (only on the first arc)
        """
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        nNextNodeMask = 1 << ((nBytesArc*8)-3)
        if len(self.arcs) == 0:
            val = nFinalNodeMask | nFinalArcMask
 | 
| ︙ | ︙ | |||
| 682 683 684 685 686 687 688 | 
            if (self.pos + 1) == self.arcs[arc].pos and self.i != 0:
                val = val | nNextNodeMask
                by += val.to_bytes(nBytesArc, byteorder='big')
            else:
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
        return by
 | | | > | > > | | | | | | | | | | | | | | | 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 | 
            if (self.pos + 1) == self.arcs[arc].pos and self.i != 0:
                val = val | nNextNodeMask
                by += val.to_bytes(nBytesArc, byteorder='big')
            else:
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
        return by
    def getTxtRepr2 (self, nBytesArc, lVal):
        "return representation as string of node (method 2)"
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        nNextNodeMask = 1 << ((nBytesArc*8)-3)
        s = "i{:_>10} -- #{:_>10}\n".format(self.i, self.addr)
        if nArc == 0:
            s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format("", bin(nFinalNodeMask | nFinalArcMask)[2:], "0", "0")
            return s
        for i, arc in enumerate(self.arcs, 1):
            val = arc
            if i == 1 and self.final:
                val = val | nFinalNodeMask
            if i == nArc:
                val = val | nFinalArcMask
            if (self.pos + 1) == self.arcs[arc].pos  and self.i != 0:
                val = val | nNextNodeMask
                s += "  {:<20}  {:0>16}\n".format(lVal[arc], bin(val)[2:])
            else:
                s += "  {:<20}  {:0>16}  i{:_>10}   #{:_>10}\n".format(lVal[arc], bin(val)[2:], self.arcs[arc].i, self.arcs[arc].addr)
        return s
    # VERSION 3 =====================================================================================================
    def convToBytes3 (self, nBytesArc, nBytesNodeAddress, nBytesOffset):
        """
        Convert to bytes (method 3).
        Node scheme:
        - Arc length is defined by nBytesArc
        - Address length is defined by nBytesNodeAddress
        - Offset length is defined by nBytesOffset
        |                Arc                |            Address of next node  or  offset to next node              |
        |                                   |                                                                       |
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃1┃0┃0┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
         [...]
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃0┃0┃1┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃     Offsets are shorter than addresses
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
         ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━┓
         ┃0┃1┃0┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃ ┃
         ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━━━━┛
          ^ ^ ^
          ┃ ┃ ┃
          ┃ ┃ ┗━━ if 1, offset instead of address of next node
          ┃ ┗━━━━ if 1, last arc of this node
          ┗━━━━━━ if 1, this node is final (only on the first arc)
        """
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        nNextNodeMask = 1 << ((nBytesArc*8)-3)
        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
        if nArc == 0:
 | 
| ︙ | ︙ | |||
| 757 758 759 760 761 762 763 | 
                val = val | nNextNodeMask
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += (self.arcs[arc].addr-self.addr).to_bytes(nBytesOffset, byteorder='big')
            else:
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
        return by
 | | | > | 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 | 
                val = val | nNextNodeMask
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += (self.arcs[arc].addr-self.addr).to_bytes(nBytesOffset, byteorder='big')
            else:
                by += val.to_bytes(nBytesArc, byteorder='big')
                by += self.arcs[arc].addr.to_bytes(nBytesNodeAddress, byteorder='big')
        return by
    def getTxtRepr3 (self, nBytesArc, nBytesOffset, lVal):
        "return representation as string of node (method 3)"
        nArc = len(self.arcs)
        nFinalNodeMask = 1 << ((nBytesArc*8)-1)
        nFinalArcMask = 1 << ((nBytesArc*8)-2)
        nNextNodeMask = 1 << ((nBytesArc*8)-3)
        nMaxOffset = (2 ** (nBytesOffset * 8)) - 1
        s = "i{:_>10} -- #{:_>10}  ({})\n".format(self.i, self.addr, self.size)
        if nArc == 0:
 | 
| ︙ | ︙ | |||
| 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 | 
_dCharOrder = {
    # key: previous char, value: dictionary of chars {c: nValue}
    "": {}
}
def addWordToCharDict (sWord):
    cPrevious = ""
    for cChar in sWord:
        if cPrevious not in _dCharOrder:
            _dCharOrder[cPrevious] = {}
        _dCharOrder[cPrevious][cChar] = _dCharOrder[cPrevious].get(cChar, 0) + 1
        cPrevious = cChar
def getCharOrderAfterChar (cChar):
    return _dCharOrder.get(cChar, None)
def displayCharOrder ():
    for key, value in _dCharOrder.items():
        print("[" + key + "]: ", ", ".join([ c+":"+str(n)  for c, n  in  sorted(value.items(), key=lambda t: t[1], reverse=True) ]))
 | > > > | 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 | 
_dCharOrder = {
    # key: previous char, value: dictionary of chars {c: nValue}
    "": {}
}
def addWordToCharDict (sWord):
    "for each character of <sWord>, count how many times it appears after the previous character, and store result in a <_dCharOrder>"
    cPrevious = ""
    for cChar in sWord:
        if cPrevious not in _dCharOrder:
            _dCharOrder[cPrevious] = {}
        _dCharOrder[cPrevious][cChar] = _dCharOrder[cPrevious].get(cChar, 0) + 1
        cPrevious = cChar
def getCharOrderAfterChar (cChar):
    "return a dictionary of chars with number of times it appears after character <cChar>"
    return _dCharOrder.get(cChar, None)
def displayCharOrder ():
    "display how many times each character appear after another one"
    for key, value in _dCharOrder.items():
        print("[" + key + "]: ", ", ".join([ c+":"+str(n)  for c, n  in  sorted(value.items(), key=lambda t: t[1], reverse=True) ]))
 | 
Modified graphspell/echo.py from [6d11a5dda8] to [440b1511e9].
| 1 2 | #!python3 | > | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | 
#!python3
"""
The most boring yet indispensable function: print!
Because you can print on Windows console without being sure the script won’t crash…
Windows console don’t accept many characters.
"""
import sys
_CHARMAP = str.maketrans({  'œ': 'ö',  'Œ': 'Ö',  'ʳ': "r",  'ᵉ': "e",  '…': "_",  \
                            '“': '"',  '”': '"',  '„': '"',  '‘': "'",  '’': "'",  \
                            'ā': 'â',  'Ā': 'Â',  'ē': 'ê',  'Ē': 'Ê',  'ī': 'î',  'Ī': 'Î',  \
 | 
| ︙ | ︙ | |||
| 20 21 22 23 24 25 26 | 
        Encoding depends on Windows locale. No useful standard.
        Always returns True (useful for debugging)."""
    if sys.platform != "win32":
        print(obj, sep=sep, end=end, file=file, flush=flush)
        return True
    try:
        print(str(obj).translate(_CHARMAP), sep=sep, end=end, file=file, flush=flush)
 | | | 24 25 26 27 28 29 30 31 32 33 | 
        Encoding depends on Windows locale. No useful standard.
        Always returns True (useful for debugging)."""
    if sys.platform != "win32":
        print(obj, sep=sep, end=end, file=file, flush=flush)
        return True
    try:
        print(str(obj).translate(_CHARMAP), sep=sep, end=end, file=file, flush=flush)
    except Exception:
        print(str(obj).encode('ascii', 'replace').decode('ascii', 'replace'), sep=sep, end=end, file=file, flush=flush)
    return True
 | 
Added graphspell/fr.py version [963bf7ea5b].
| > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | 
"""
Default suggestion for French language
"""
dSugg = {
    "bcp": "beaucoup",
    "ca": "ça",
    "cad": "c’est-à-dire",
    "cb": "combien|CB",
    "cdlt": "cordialement",
    "construirent": "construire|construisirent|construisent|construiront",
    "càd": "c’est-à-dire",
    "dc": "de|donc",
    "email": "courriel|e-mail|émail",
    "emails": "courriels|e-mails",
    "Etes-vous": "Êtes-vous",
    "Etiez-vous": "Étiez-vous",
    "Etions-nous": "Étions-nous",
    "parce-que": "parce que",
    "pcq": "parce que",
    "pd": "pendant",
    "pdq": "pendant que",
    "pdt": "pendant",
    "pdtq": "pendant que",
    "pk": "pourquoi",
    "pq": "pourquoi|PQ",
    "prq": "presque",
    "prsq": "presque",
    "qcq": "quiconque",
    "qq": "quelque",
    "qqch": "quelque chose",
    "qqn": "quelqu’un",
    "qqne": "quelqu’une",
    "qqs": "quelques",
    "qqunes": "quelques-unes",
    "qquns": "quelques-uns",
    "tdq": "tandis que",
    "tj": "toujours",
    "tjs": "toujours",
    "tq": "tant que|tandis que",
    "ts": "tous",
    "tt": "tant|tout",
    "tte": "toute",
    "ttes": "toutes",
    "y’a": "y a"
}
 | 
Modified graphspell/ibdawg.py from [a255097656] to [0f1b5456be].
| 1 2 | #!python3 | > > > > > | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | 
#!python3
"""
INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH
Implementation of a spellchecker as a transducer (storing transformation code to get lemma and morphologies)
and a spell suggestion mechanim
"""
import traceback
import pkgutil
import re
from functools import wraps
import time
import json
import binascii
#import logging
#logging.basicConfig(filename="suggestions.log", level=logging.DEBUG)
from . import str_transform as st
from . import char_player as cp
from .echo import echo
def timethis (func):
    "decorator for the execution time"
    @wraps(func)
    def wrapper (*args, **kwargs):
        "something to prevent pylint whining"
        fStart = time.time()
        result = func(*args, **kwargs)
        fEnd = time.time()
        print(func.__name__, fEnd - fStart)
        return result
    return wrapper
 | 
| ︙ | ︙ | |||
| 54 55 56 57 58 59 60 | 
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
                self.aSugg.add(sSugg)
                if nDist < self.nMinDist:
                    self.nMinDist = nDist
                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
 | | > | 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | 
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
                self.aSugg.add(sSugg)
                if nDist < self.nMinDist:
                    self.nMinDist = nDist
                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)
    def getSuggestions (self, nSuggLimit=10):
        "return a list of suggestions"
        if self.dSugg[0]:
            # we sort the better results with the original word
            self.dSugg[0].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
        lRes = self.dSugg.pop(0)
        for nDist, lSugg in self.dSugg.items():
            if nDist <= self.nDistLimit:
                lRes.extend(lSugg)
                if len(lRes) > nSuggLimit:
                    break
        lRes = list(cp.filterSugg(lRes))
        if self.sWord.isupper():
            lRes = list(map(lambda sSugg: sSugg.upper(), lRes))
        elif self.sWord[0:1].isupper():
            lRes = list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))  # dont’ use <.istitle>
        return lRes[:nSuggLimit]
    def reset (self):
        "clear data"
        self.aSugg.clear()
        self.dSugg.clear()
class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""
 | 
| ︙ | ︙ | |||
| 145 146 147 148 149 150 151 | 
            raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
        if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
        try:
            header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
        except Exception:
            raise Exception
 | | | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | 
            raise TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: {}".format(self.by[0:9]))
        if not(self.by[17:18] == b"1" or self.by[17:18] == b"2" or self.by[17:18] == b"3"):
            raise ValueError("# Error. Unknown dictionary version: {}".format(self.by[17:18]))
        try:
            header, info, values, bdic = self.by.split(b"\0\0\0\0", 3)
        except Exception:
            raise Exception
        self.nCompressionMethod = int(self.by[17:18].decode("utf-8"))
        self.sHeader = header.decode("utf-8")
        self.lArcVal = values.decode("utf-8").split("\t")
        self.nArcVal = len(self.lArcVal)
        self.byDic = bdic
        l = info.decode("utf-8").split("//")
 | 
| ︙ | ︙ | |||
| 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | 
    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)
    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
 | > | | | | | | | | | | | | | | | | | | | | | | | | | | 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | 
    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)
    def writeAsJSObject (self, spfDest, bInJSModule=False, bBinaryDictAsHexString=False):
        "write IBDAWG as a JavaScript object in a JavaScript module"
        with open(spfDest, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write(json.dumps({
                "sHeader": "/grammalecte-fsa/",
                "sLangCode": self.sLangCode,
                "sLangName": self.sLangName,
                "sDicName": self.sDicName,
                "sFileName": self.sFileName,
                "sDate": self.sDate,
                "nEntry": self.nEntry,
                "nChar": self.nChar,
                "nAff": self.nAff,
                "nTag": self.nTag,
                "cStemming": self.cStemming,
                "dChar": self.dChar,
                "nNode": self.nNode,
                "nArc": self.nArc,
                "nArcVal": self.nArcVal,
                "lArcVal": self.lArcVal,
                "nCompressionMethod": self.nCompressionMethod,
                "nBytesArc": self.nBytesArc,
                "nBytesNodeAddress": self.nBytesNodeAddress,
                "nBytesOffset": self.nBytesOffset,
                # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                # https://github.com/mozilla/addons-linter/issues/1361
                "sByDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ]
            }, ensure_ascii=False))
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")
    def isValidToken (self, sToken):
        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
        sToken = cp.spellingNormalization(sToken)
        if self.isValid(sToken):
 | 
| ︙ | ︙ | |||
| 263 264 265 266 267 268 269 | 
    def lookup (self, sWord):
        "returns True if <sWord> in dictionary (strict verification)"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
 | | | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | 
    def lookup (self, sWord):
        "returns True if <sWord> in dictionary (strict verification)"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return False
        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        sWord = cp.spellingNormalization(sWord)
        l = self.morph(sWord)
 | 
| ︙ | ︙ | |||
| 342 343 344 345 346 347 348 | 
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)
    #@timethis
 | | | | 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | 
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump, nDist, nDeep+1, iAddr, sNewWord, True)
    #@timethis
    def suggest2 (self, sWord, nSuggLimit=10):
        "returns a set of suggestions for <sWord>"
        sWord = cp.spellingNormalization(sWord)
        sPfx, sWord, sSfx = cp.cut(sWord)
        oSuggResult = SuggResult(sWord)
        self._suggest2(oSuggResult)
        aSugg = oSuggResult.getSuggestions(nSuggLimit)
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg
    def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
        # recursive function
 | 
| ︙ | ︙ | |||
| 405 406 407 408 409 410 411 | 
    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        sWord = cp.spellingNormalization(sWord)
        c1 = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
 | | | | | 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | 
    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        sWord = cp.spellingNormalization(sWord)
        c1 = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
        echo(c1 + ": ", end="")
        for c2, jAddr in self._getCharArcs(iAddr):
            echo(c2, end="")
            if c2 == sWord[0:1]:
                iNextNodeAddr = jAddr
                iPos = n
            n += 1
        if not sWord:
            return
        if iPos >= 0:
            echo("\n   " + " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)
    def getSimilarEntries (self, sWord, nSuggLimit=10):
        "return a list of tuples (similar word, stem, morphology)"
        if not sWord:
            return []
        lResult = []
 | 
| ︙ | ︙ | |||
| 467 468 469 470 471 472 473 | 
    def _morph1 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
 | | | | | | | | | | | | | | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 | 
    def _morph1 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
                iAddr = iEndArcAddr+self.nBytesNodeAddress
            return l
        return []
    def _stem1 (self, sWord):
        "returns stems list of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
                iAddr = iEndArcAddr+self.nBytesNodeAddress
            return l
        return []
    def _lookupArcNode1 (self, nVal, iAddr):
        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
        while True:
            iEndArcAddr = iAddr+self.nBytesArc
            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
            if nVal == (nRawArc & self._arcMask):
                # the value we are looking for
                # we return the address of the next node
                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
            else:
                # value not found
                if nRawArc & self._lastArcMask:
                    return None
                iAddr = iEndArcAddr+self.nBytesNodeAddress
    def _getArcs1 (self, iAddr):
        "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
        while True:
            iEndArcAddr = iAddr+self.nBytesArc
            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
            yield nRawArc & self._arcMask, int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
            if nRawArc & self._lastArcMask:
                break
            iAddr = iEndArcAddr+self.nBytesNodeAddress
    def _writeNodes1 (self, spfDest):
        "for debugging only"
        print(" > Write binary nodes")
        with open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
            iAddr = 0
            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
            while iAddr < len(self.byDic):
                iEndArcAddr = iAddr+self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", \
 | 
| ︙ | ︙ | |||
| 565 566 567 568 569 570 571 | 
    def _morph2 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
 | | | | | | | | | | | | | | | | | | | | | | | | | 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | 
    def _morph2 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    if not (nRawArc & self._addrBitMask):
                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    else:
                        # we go to the end of the node
                        iAddr2 = iEndArcAddr
                        while not (nRawArc & self._lastArcMask):
                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
            return l
        return []
    def _stem2 (self, sWord):
        "returns stems list of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
                    # Now , we go to the next node
                    if not (nRawArc & self._addrBitMask):
                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    else:
                        # we go to the end of the node
                        iAddr2 = iEndArcAddr
                        while not (nRawArc & self._lastArcMask):
                            nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
                            iAddr2 += self.nBytesArc + self.nBytesNodeAddress
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
            return l
        return []
    def _lookupArcNode2 (self, nVal, iAddr):
        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
        while True:
            iEndArcAddr = iAddr+self.nBytesArc
            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
            if nVal == (nRawArc & self._arcMask):
                # the value we are looking for
                if not (nRawArc & self._addrBitMask):
                    # we return the address of the next node
                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                else:
                    # we go to the end of the node
                    iAddr = iEndArcAddr
                    while not (nRawArc & self._lastArcMask):
                        nRawArc = int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big')
                        iAddr += self.nBytesArc + self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else self.nBytesArc
                    return iAddr
            else:
                # value not found
                if nRawArc & self._lastArcMask:
                    return None
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
    def _writeNodes2 (self, spfDest):
        "for debugging only"
        print(" > Write binary nodes")
        with open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
            iAddr = 0
            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
            while iAddr < len(self.byDic):
                iEndArcAddr = iAddr+self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if not (nRawArc & self._addrBitMask):
                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
                    iAddr = iEndArcAddr+self.nBytesNodeAddress
                else:
                    hDst.write("  {:<20}  {:0>16}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:]))
                    iAddr = iEndArcAddr
                if nRawArc & self._lastArcMask:
                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
            hDst.close()
    # VERSION 3
    def _morph3 (self, sWord):
        "returns morphologies of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            iAddrNode = iAddr
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
                    # Now , we go to the next node and retrieve all following arcs values, all of them are tags
                    if not (nRawArc & self._addrBitMask):
                        iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    else:
                        iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
                    nRawArc2 = 0
                    while not (nRawArc2 & self._lastArcMask):
                        iEndArcAddr2 = iAddr2 + self.nBytesArc
                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
            return l
        return []
    def _stem3 (self, sWord):
        "returns stems list of <sWord>"
        iAddr = 0
        for c in sWord:
            if c not in self.dChar:
                return []
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr is None:
                return []
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            l = []
            nRawArc = 0
            #iAddrNode = iAddr
            while not (nRawArc & self._lastArcMask):
                iEndArcAddr = iAddr + self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if nArc > self.nChar:
                    # This value is not a char, this is a stemming code
                    l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
            return l
        return []
    def _lookupArcNode3 (self, nVal, iAddr):
        "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
        iAddrNode = iAddr
        while True:
            iEndArcAddr = iAddr+self.nBytesArc
            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
            if nVal == (nRawArc & self._arcMask):
                # the value we are looking for
                if not (nRawArc & self._addrBitMask):
                    return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                else:
                    return iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
            else:
                # value not found
                if nRawArc & self._lastArcMask:
                    return None
                iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
    def _writeNodes3 (self, spfDest):
        "for debugging only"
        print(" > Write binary nodes")
        with open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
            iAddr = 0
            hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
            while iAddr < len(self.byDic):
                iEndArcAddr = iAddr+self.nBytesArc
                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
                nArc = nRawArc & self._arcMask
                if not (nRawArc & self._addrBitMask):
                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
                    hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
                    iAddr = iEndArcAddr+self.nBytesNodeAddress
                else:
                    iNextNodeAddr = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
                    hDst.write("  {:<20}  {:0>16}  i{:>10}   +{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", iNextNodeAddr))
                    iAddr = iEndArcAddr+self.nBytesOffset
                if nRawArc & self._lastArcMask:
                    hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
            hDst.close()
 | 
Modified graphspell/keyboard_chars_proximity.py from [8f397a7bbf] to [f71f3b18e4].
| 
 | 
 | > > | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | 
"""
Keyboard chars proximity
"""
def getKeyboardMap (sKeyboard):
    "return keyboard map as a dictionary of chars"
    return _dKeyboardMap.get(sKeyboard.lower(), {})
def getKeyboardList ():
    "return list of keyboards available"
    return _dKeyboardMap.keys()
_dKeyboardMap = {
    # keyboards by alphabetical order
    # bépo, colemak and dvorak users are assumed to do less typing errors.
    "azerty": {
 | 
| ︙ | ︙ | 
Modified graphspell/progressbar.py from [5def72a6ce] to [b21d9bfaa8].
| 
 | 
 | > | > > | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | 
"""
Textual progressbar
"""
# by Olivier R.
# License: MPL 2
import time
class ProgressBar:
    "Textual progressbar"
    def __init__ (self, nMin=0, nMax=100, nWidth=78):
        "initiate with minimum nMin to maximum nMax"
        self.nMin = nMin
        self.nMax = nMax
        self.nSpan = nMax - nMin
        self.nWidth = nWidth-9
        self.nAdvance = -1
        self.nCurVal = nMin
        self.startTime = time.time()
        self._update()
    def _update (self):
        fDone = (self.nCurVal - self.nMin) / self.nSpan
        nAdvance = int(fDone * self.nWidth)
        if nAdvance > self.nAdvance:
            self.nAdvance = nAdvance
            print("\r[ {}{}  {}% ] ".format('>'*nAdvance, ' '*(self.nWidth-nAdvance), round(fDone*100)), end="")
    def increment (self, n=1):
        "increment value by n (1 by default)"
        self.nCurVal += n
        self._update()
    def done (self):
        "to call when it’s finished"
        print("\r[ task done in {:.1f} s ] ".format(time.time() - self.startTime))
 | 
Modified graphspell/spellchecker.py from [cbd22d2c4d] to [85bf9023fe].
| 
 | 
 | > | < | | | | | | | > > > > > > > > > | > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | 
"""
Spellchecker.
Useful to check several dictionaries at once.
To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the extended dictionary
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""
import importlib
import traceback
from . import ibdawg
from . import tokenizer
dDefaultDictionaries = {
    "fr": "fr-allvars.bdic",
    "en": "en.bdic"
}
class SpellChecker ():
    "SpellChecker: wrapper for the IBDAWG class"
    def __init__ (self, sLangCode, sfMainDic="", sfExtendedDic="", sfCommunityDic="", sfPersonalDic=""):
        "returns True if the main dictionary is loaded"
        self.sLangCode = sLangCode
        if not sfMainDic:
            sfMainDic = dDefaultDictionaries.get(sLangCode, "")
        self.oMainDic = self._loadDictionary(sfMainDic, True)
        self.oExtendedDic = self._loadDictionary(sfExtendedDic)
        self.oCommunityDic = self._loadDictionary(sfCommunityDic)
        self.oPersonalDic = self._loadDictionary(sfPersonalDic)
        self.bExtendedDic = bool(self.oExtendedDic)
        self.bCommunityDic = bool(self.oCommunityDic)
        self.bPersonalDic = bool(self.oPersonalDic)
        self.oTokenizer = None
        # Default suggestions
        self.dDefaultSugg = None
        self.loadSuggestions(sLangCode)
        # storage
        self.bStorage = False
        self._dMorphologies = {}        # key: flexion, value: list of morphologies
        self._dLemmas = {}              # key: flexion, value: list of lemmas
    def _loadDictionary (self, source, bNecessary=False):
        "returns an IBDAWG object"
        if not source:
            return None
        try:
            return ibdawg.IBDAWG(source)
        except Exception as e:
            if bNecessary:
                raise Exception(str(e), "Error: <" + str(source) + "> not loaded.")
            print("Error: <" + str(source) + "> not loaded.")
            traceback.print_exc()
            return None
    def _loadTokenizer (self):
        self.oTokenizer = tokenizer.Tokenizer(self.sLangCode)
    def getTokenizer (self):
        "load and return the tokenizer object"
        if not self.oTokenizer:
            self._loadTokenizer()
        return self.oTokenizer
    def setMainDictionary (self, source):
        "returns True if the dictionary is loaded"
        self.oMainDic = self._loadDictionary(source, True)
        return bool(self.oMainDic)
    def setExtendedDictionary (self, source, bActivate=True):
        "returns True if the dictionary is loaded"
        self.oExtendedDic = self._loadDictionary(source)
        self.bExtendedDic = False  if not bActivate  else bool(self.oExtendedDic)
        return bool(self.oExtendedDic)
    def setCommunityDictionary (self, source, bActivate=True):
        "returns True if the dictionary is loaded"
        self.oCommunityDic = self._loadDictionary(source)
        self.bCommunityDic = False  if not bActivate  else bool(self.oCommunityDic)
        return bool(self.oCommunityDic)
    def setPersonalDictionary (self, source, bActivate=True):
        "returns True if the dictionary is loaded"
        self.oPersonalDic = self._loadDictionary(source)
        self.bPersonalDic = False  if not bActivate  else bool(self.oPersonalDic)
        return bool(self.oPersonalDic)
    def activateExtendedDictionary (self):
        "activate extended dictionary (if available)"
        self.bExtendedDic = bool(self.oExtendedDic)
    def activateCommunityDictionary (self):
        "activate community dictionary (if available)"
        self.bCommunityDic = bool(self.oCommunityDic)
    def activatePersonalDictionary (self):
        "activate personal dictionary (if available)"
        self.bPersonalDic = bool(self.oPersonalDic)
    def deactivateExtendedDictionary (self):
        "deactivate extended dictionary"
        self.bExtendedDic = False
    def deactivateCommunityDictionary (self):
        "deactivate community dictionary"
        self.bCommunityDic = False
    def deactivatePersonalDictionary (self):
        "deactivate personal dictionary"
        self.bPersonalDic = False
    # Default suggestions
    def loadSuggestions (self, sLangCode):
        "load default suggestion module for <sLangCode>"
        try:
            suggest = importlib.import_module("."+sLangCode, "graphspell")
        except ImportError:
            print("No suggestion module for language <"+sLangCode+">")
            return
        self.dDefaultSugg = suggest.dSugg
    # Storage
    def activateStorage (self):
        "store all lemmas and morphologies retrieved from the word graph"
        self.bStorage = True
    def deactivateStorage (self):
        "stop storing all lemmas and morphologies retrieved from the word graph"
        self.bStorage = False
    def clearStorage (self):
        "clear all stored data"
        self._dLemmas.clear()
        self._dMorphologies.clear()
    # parse text functions
    def parseParagraph (self, sText, bSpellSugg=False):
        "return a list of tokens where token value doesn’t exist in the word graph"
        if not self.oTokenizer:
            self._loadTokenizer()
        aSpellErrs = []
        for dToken in self.oTokenizer.genTokens(sText):
            if dToken['sType'] == "WORD" and not self.isValidToken(dToken['sValue']):
                if bSpellSugg:
                    dToken['aSuggestions'] = []
                    for lSugg in self.suggest(dToken['sValue']):
                        dToken['aSuggestions'].extend(lSugg)
                aSpellErrs.append(dToken)
        return aSpellErrs
    def countWordsOccurrences (self, sText, bByLemma=False, bOnlyUnknownWords=False, dWord={}):
        """count word occurrences.
           <dWord> can be used to cumulate count from several texts."""
        if not self.oTokenizer:
            self._loadTokenizer()
        for dToken in self.oTokenizer.genTokens(sText):
            if dToken['sType'] == "WORD":
                if bOnlyUnknownWords:
                    if not self.isValidToken(dToken['sValue']):
                        dWord[dToken['sValue']] = dWord.get(dToken['sValue'], 0) + 1
                else:
                    if not bByLemma:
 | 
| ︙ | ︙ | |||
| 147 148 149 150 151 152 153 | 
    def isValid (self, sWord):
        "checks if sWord is valid (different casing tested if the first letter is a capital)"
        if self.oMainDic.isValid(sWord):
            return True
        if self.bExtendedDic and self.oExtendedDic.isValid(sWord):
            return True
 | | | > > | | | | > > > | > > > > > | > > > > > > > | > > > | 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | 
    def isValid (self, sWord):
        "checks if sWord is valid (different casing tested if the first letter is a capital)"
        if self.oMainDic.isValid(sWord):
            return True
        if self.bExtendedDic and self.oExtendedDic.isValid(sWord):
            return True
        if self.bCommunityDic and self.oCommunityDic.isValid(sWord):
            return True
        if self.bPersonalDic and self.oPersonalDic.isValid(sWord):
            return True
        return False
    def lookup (self, sWord):
        "checks if sWord is in dictionary as is (strict verification)"
        if self.oMainDic.lookup(sWord):
            return True
        if self.bExtendedDic and self.oExtendedDic.lookup(sWord):
            return True
        if self.bCommunityDic and self.oCommunityDic.lookup(sWord):
            return True
        if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
            return True
        return False
    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        if self.bStorage and sWord in self._dMorphologies:
            return self._dMorphologies[sWord]
        lMorph = self.oMainDic.getMorph(sWord)
        if self.bExtendedDic:
            lMorph.extend(self.oExtendedDic.getMorph(sWord))
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = set([ s[1:s.find("/")]  for s in lMorph ])
        return lMorph
    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return set([ s[1:s.find("/")]  for s in self.getMorph(sWord) ])
    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.dDefaultSugg:
            if sWord in self.dDefaultSugg:
                yield self.dDefaultSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.dDefaultSugg:
                lRes = self.dDefaultSugg[sWord.lower()].split("|")
                yield list(map(lambda sSugg: sSugg[0:1].upper()+sSugg[1:], lRes))
            else:
                yield self.oMainDic.suggest(sWord, nSuggLimit)
        else:
            yield self.oMainDic.suggest(sWord, nSuggLimit)
        if self.bExtendedDic:
            yield self.oExtendedDic.suggest(sWord, nSuggLimit)
        if self.bCommunityDic:
            yield self.oCommunityDic.suggest(sWord, nSuggLimit)
        if self.bPersonalDic:
            yield self.oPersonalDic.suggest(sWord, nSuggLimit)
    def select (self, sFlexPattern="", sTagsPattern=""):
        "generator: returns all entries which flexion fits <sFlexPattern> and morphology fits <sTagsPattern>"
        yield from self.oMainDic.select(sFlexPattern, sTagsPattern)
        if self.bExtendedDic:
            yield from self.oExtendedDic.select(sFlexPattern, sTagsPattern)
        if self.bCommunityDic:
            yield from self.oCommunityDic.select(sFlexPattern, sTagsPattern)
        if self.bPersonalDic:
            yield from self.oPersonalDic.select(sFlexPattern, sTagsPattern)
    def drawPath (self, sWord):
        "draw the path taken by <sWord> within the word graph: display matching nodes and their arcs"
        self.oMainDic.drawPath(sWord)
        if self.bExtendedDic:
            print("-----")
            self.oExtendedDic.drawPath(sWord)
        if self.bCommunityDic:
            print("-----")
            self.oCommunityDic.drawPath(sWord)
 | 
| ︙ | ︙ | 
Modified graphspell/str_transform.py from [9961c8cbc8] to [c5501f9a5a].
| 1 2 3 4 5 6 7 8 | 
#!python3
#### DISTANCE CALCULATIONS
def longestCommonSubstring (s1, s2):
    # http://en.wikipedia.org/wiki/Longest_common_substring_problem
    # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring
 | > > > > > > > | | | | | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | 
#!python3
"""
Operations on strings:
- calculate distance between two strings
- transform strings with transformation codes
"""
#### DISTANCE CALCULATIONS
def longestCommonSubstring (s1, s2):
    "longest common substring"
    # http://en.wikipedia.org/wiki/Longest_common_substring_problem
    # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring
    lMatrix = [ [0]*(1+len(s2)) for i in range(1+len(s1)) ]
    nLongest, nLongestX = 0, 0
    for x in range(1, 1+len(s1)):
        for y in range(1, 1+len(s2)):
            if s1[x-1] == s2[y-1]:
                lMatrix[x][y] = lMatrix[x-1][y-1] + 1
                if lMatrix[x][y] > nLongest:
                    nLongest = lMatrix[x][y]
                    nLongestX = x
            else:
                lMatrix[x][y] = 0
    return s1[nLongestX-nLongest : nLongestX]
def distanceDamerauLevenshtein (s1, s2):
    "distance of Damerau-Levenshtein between <s1> and <s2>"
    # https://fr.wikipedia.org/wiki/Distance_de_Damerau-Levenshtein
    d = {}
    nLen1 = len(s1)
 | 
| ︙ | ︙ | |||
| 52 53 54 55 56 57 58 | 
        return len(s1)
    nLen1, nLen2 = len(s1), len(s2)
    i1, i2 = 0, 0   # Cursors for each string
    nLargestCS = 0  # Largest common substring
    nLocalCS = 0    # Local common substring
    nTrans = 0      # Number of transpositions ('ab' vs 'ba')
    lOffset = []    # Offset pair array, for computing the transpositions
 | | | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | 
        return len(s1)
    nLen1, nLen2 = len(s1), len(s2)
    i1, i2 = 0, 0   # Cursors for each string
    nLargestCS = 0  # Largest common substring
    nLocalCS = 0    # Local common substring
    nTrans = 0      # Number of transpositions ('ab' vs 'ba')
    lOffset = []    # Offset pair array, for computing the transpositions
    while i1 < nLen1 and i2 < nLen2:
        if s1[i1] == s2[i2]:
            nLocalCS += 1
            # Check if current match is a transposition
            bTrans = False
            i = 0
            while i < len(lOffset):
 | 
| ︙ | ︙ | |||
| 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | 
            nLocalCS = 0
            i1 = i2 = min(i1, i2)
    nLargestCS += nLocalCS
    return round(max(nLen1, nLen2) - nLargestCS + nTrans)
def showDistance (s1, s2):
    print("Damerau-Levenshtein: " + s1 + "/" + s2 + " = " + distanceDamerauLevenshtein(s1, s2))
    print("Sift4:" + s1 + "/" + s2 + " = " + distanceSift4(s1, s2))
#### STEMMING OPERATIONS
## No stemming
def noStemming (sFlex, sStem):
    return sStem
 | > > | > > > | | | | | | | | | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | 
            nLocalCS = 0
            i1 = i2 = min(i1, i2)
    nLargestCS += nLocalCS
    return round(max(nLen1, nLen2) - nLargestCS + nTrans)
def showDistance (s1, s2):
    "display Damerau-Levenshtein distance and Sift4 distance between <s1> and <s2>"
    print("Damerau-Levenshtein: " + s1 + "/" + s2 + " = " + distanceDamerauLevenshtein(s1, s2))
    print("Sift4:" + s1 + "/" + s2 + " = " + distanceSift4(s1, s2))
#### STEMMING OPERATIONS
## No stemming
def noStemming (sFlex, sStem):
    "return <sStem>"
    return sStem
def rebuildWord (sFlex, sCode1, sCode2):
    """ Change <sFlex> with codes (each inserts a char at a defined possition).
        <I forgot what purpose it has…>
    """
    if sCode1 == "_":
        return sFlex
    n, c = sCode1.split(":")
    sFlex = sFlex[:n] + c + sFlex[n:]
    if sCode2 == "_":
        return sFlex
    n, c = sCode2.split(":")
    return sFlex[:n] + c + sFlex[n:]
## Define affixes for stemming
# Note: 48 is the ASCII code for "0"
# Suffix only
def defineSuffixCode (sFlex, sStem):
 | 
| ︙ | ︙ | |||
| 148 149 150 151 152 153 154 | 
    if sFlex == sStem:
        return "0"
    jSfx = 0
    for i in range(min(len(sFlex), len(sStem))):
        if sFlex[i] != sStem[i]:
            break
        jSfx += 1
 | | > | | 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | 
    if sFlex == sStem:
        return "0"
    jSfx = 0
    for i in range(min(len(sFlex), len(sStem))):
        if sFlex[i] != sStem[i]:
            break
        jSfx += 1
    return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]
def changeWordWithSuffixCode (sWord, sSfxCode):
    "apply transformation code <sSfxCode> on <sWord> and return the result string"
    if sSfxCode == "0":
        return sWord
    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
# Prefix and suffix
def defineAffixCode (sFlex, sStem):
    """ Returns a string defining how to get stem from flexion. Examples:
            "0" if stem = flexion
            "stem" if no common substring
            "n(pfx)/m(sfx)"
        with n and m: chars with numeric meaning, "0" = 0, "1" = 1, ... ":" = 10, etc. (See ASCII table.) Says how many letters to strip from flexion.
            pfx [optional]: string to add before the flexion
            sfx [optional]: string to add after the flexion
    """
    if sFlex == sStem:
        return "0"
    # is stem a substring of flexion?
    n = sFlex.find(sStem)
    if n >= 0:
 | 
| ︙ | ︙ | |||
| 187 188 189 190 191 192 193 194 195 196 197 198 | 
        n = sFlex.find(sSubs)
        m = len(sFlex) - (len(sSubs)+n)
        return chr(n+48) + sPfx + "/" + chr(m+48) + sSfx
    return sStem
def changeWordWithAffixCode (sWord, sAffCode):
    if sAffCode == "0":
        return sWord
    if '/' not in sAffCode:
        return sAffCode
    sPfxCode, sSfxCode = sAffCode.split('/')
 | > | < | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | 
        n = sFlex.find(sSubs)
        m = len(sFlex) - (len(sSubs)+n)
        return chr(n+48) + sPfx + "/" + chr(m+48) + sSfx
    return sStem
def changeWordWithAffixCode (sWord, sAffCode):
    "apply transformation code <sAffCode> on <sWord> and return the result string"
    if sAffCode == "0":
        return sWord
    if '/' not in sAffCode:
        return sAffCode
    sPfxCode, sSfxCode = sAffCode.split('/')
    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):]
    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
 | 
Modified graphspell/tokenizer.py from [17f452887e] to [daca54adb9].
| 
 | 
 | > | > > | | | > | | | | | > > | > > > > | | > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | 
"""
Very simple tokenizer
using regular expressions
"""
import re
_PATTERNS = {
    "default":
        (
            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>\d+(?:[.,]\d+))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        ),
    "fr":
        (
            r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<WORD_ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r"(?P<WORD_ELIDED>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
            r'(?P<WORD_ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>\d+(?:[.,]\d+|))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        )
}
class Tokenizer:
    "Tokenizer: transforms a text in a list of tokens"
    def __init__ (self, sLang):
        self.sLang = sLang
        if sLang not in _PATTERNS:
            self.sLang = "default"
        self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) )
    def genTokens (self, sText, bStartEndToken=False):
        "generator: tokenize <sText>"
        i = 0
        if bStartEndToken:
            yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
        for i, m in enumerate(self.zToken.finditer(sText), 1):
            yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
        if bStartEndToken:
            iEnd = len(sText)
            yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }
 | 
Modified make.py from [14e0172bf2] to [cf1490b4f0].
| 1 2 3 4 5 | #!/usr/bin/env python3 # coding: UTF-8 import sys import os | > > > > < | > | | | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | 
 #!/usr/bin/env python3
# coding: UTF-8
"""
Grammalecte builder
"""
import sys
import os
import re
import zipfile
import traceback
import configparser
import datetime
import argparse
import importlib
import unittest
import json
import platform
from distutils import dir_util, file_util
#import dialog_bundled
import compile_rules
import helpers
import lex_build
sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n"
def getConfig (sLang):
    "load config.ini in <sLang> at gc_lang/<sLang>, returns xConfigParser object"
    xConfig = configparser.ConfigParser()
    xConfig.optionxform = str
    try:
        xConfig.read_file(open("gc_lang/" + sLang + "/config.ini", "r", encoding="utf-8"))
    except FileNotFoundError:
        print("# Error. Can’t read config file [" + sLang + "]")
        exit()
    return xConfig
def createOptionsLabelProperties (dOptLbl):
    "create content for .properties files (LibreOffice)"
    sContent = ""
    for sOpt, tLabel in dOptLbl.items():
        sContent += sOpt + "=" + tLabel[0] + "\n"
        if tLabel[1]:
            sContent += "hlp_" + sOpt + "=" + tLabel[1] + "\n"
    return sContent
def createDialogOptionsXDL (dVars):
    "create bundled dialog options file .xdl (LibreOffice)"
    sFixedline = '<dlg:fixedline dlg:id="{0}" dlg:tab-index="{1}" dlg:top="{2}" dlg:left="5" dlg:width="{3}" dlg:height="10" dlg:value="&{0}" />\n'
    sCheckbox = '<dlg:checkbox dlg:id="{0}" dlg:tab-index="{1}" dlg:top="{2}" dlg:left="{3}" dlg:width="{4}" dlg:height="10" dlg:value="&{0}" dlg:checked="{5}" {6} />\n'
    iTabIndex = 1
    nPosY = 5
    nWidth = 240
    sContent = ""
    dOpt = dVars["dOptPython"]
 | 
| ︙ | ︙ | |||
| 131 132 133 134 135 136 137 | 
    # Installation in Writer profile
    if bInstall:
        print("> installation in Writer")
        if dVars.get('unopkg', False):
            cmd = '"'+os.path.abspath(dVars.get('unopkg')+'" add -f '+spfZip)
            print(cmd)
 | < > | 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | 
    # Installation in Writer profile
    if bInstall:
        print("> installation in Writer")
        if dVars.get('unopkg', False):
            cmd = '"'+os.path.abspath(dVars.get('unopkg')+'" add -f '+spfZip)
            print(cmd)
            os.system(cmd)
        else:
            print("# Error: path and filename of unopkg not set in config.ini")
def createServerOptions (sLang, dOptData):
    "create file options for Grammalecte server"
    with open("grammalecte-server-options."+sLang+".ini", "w", encoding="utf-8", newline="\n") as hDst:
        hDst.write("# Server options. Lang: " + sLang + "\n\n[gc_options]\n")
        for sSection, lOpt in dOptData["lStructOpt"]:
            hDst.write("\n########## " + dOptData["dOptLabel"][sLang].get(sSection, sSection + "[no label found]")[0] + " ##########\n")
            for lLineOpt in lOpt:
                for sOpt in lLineOpt:
                    hDst.write("# " + dOptData["dOptLabel"][sLang].get(sOpt, "[no label found]")[0] + "\n")
 | 
| ︙ | ︙ | |||
| 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | 
                "grammalecte-server-options._global.ini", "grammalecte-server-options."+sLang+".ini", \
                "README.txt", "LICENSE.txt", "LICENSE.fr.txt"]:
        hZip.write(spf)
    hZip.writestr("setup.py", helpers.fileFile("gc_lang/fr/setup.py", dVars))
def copyGrammalectePyPackageInZipFile (hZip, spLangPack, sAddPath=""):
    for sf in os.listdir("grammalecte"):
        if not os.path.isdir("grammalecte/"+sf):
            hZip.write("grammalecte/"+sf, sAddPath+"grammalecte/"+sf)
    for sf in os.listdir("grammalecte/graphspell"):
        if not os.path.isdir("grammalecte/graphspell/"+sf):
            hZip.write("grammalecte/graphspell/"+sf, sAddPath+"grammalecte/graphspell/"+sf)
    for sf in os.listdir("grammalecte/graphspell/_dictionaries"):
        if not os.path.isdir("grammalecte/graphspell/_dictionaries/"+sf):
            hZip.write("grammalecte/graphspell/_dictionaries/"+sf, sAddPath+"grammalecte/graphspell/_dictionaries/"+sf)
    for sf in os.listdir(spLangPack):
        if not os.path.isdir(spLangPack+"/"+sf):
            hZip.write(spLangPack+"/"+sf, sAddPath+spLangPack+"/"+sf)
def create (sLang, xConfig, bInstallOXT, bJavaScript):
    oNow = datetime.datetime.now()
    print("============== MAKE GRAMMALECTE [{0}] at {1.hour:>2} h {1.minute:>2} min {1.second:>2} s ==============".format(sLang, oNow))
    #### READ CONFIGURATION
    print("> read configuration...")
    spLang = "gc_lang/" + sLang
 | > > | 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | 
                "grammalecte-server-options._global.ini", "grammalecte-server-options."+sLang+".ini", \
                "README.txt", "LICENSE.txt", "LICENSE.fr.txt"]:
        hZip.write(spf)
    hZip.writestr("setup.py", helpers.fileFile("gc_lang/fr/setup.py", dVars))
def copyGrammalectePyPackageInZipFile (hZip, spLangPack, sAddPath=""):
    "copy Grammalecte Python package in zip file"
    for sf in os.listdir("grammalecte"):
        if not os.path.isdir("grammalecte/"+sf):
            hZip.write("grammalecte/"+sf, sAddPath+"grammalecte/"+sf)
    for sf in os.listdir("grammalecte/graphspell"):
        if not os.path.isdir("grammalecte/graphspell/"+sf):
            hZip.write("grammalecte/graphspell/"+sf, sAddPath+"grammalecte/graphspell/"+sf)
    for sf in os.listdir("grammalecte/graphspell/_dictionaries"):
        if not os.path.isdir("grammalecte/graphspell/_dictionaries/"+sf):
            hZip.write("grammalecte/graphspell/_dictionaries/"+sf, sAddPath+"grammalecte/graphspell/_dictionaries/"+sf)
    for sf in os.listdir(spLangPack):
        if not os.path.isdir(spLangPack+"/"+sf):
            hZip.write(spLangPack+"/"+sf, sAddPath+spLangPack+"/"+sf)
def create (sLang, xConfig, bInstallOXT, bJavaScript):
    "make Grammalecte for project <sLang>"
    oNow = datetime.datetime.now()
    print("============== MAKE GRAMMALECTE [{0}] at {1.hour:>2} h {1.minute:>2} min {1.second:>2} s ==============".format(sLang, oNow))
    #### READ CONFIGURATION
    print("> read configuration...")
    spLang = "gc_lang/" + sLang
 | 
| ︙ | ︙ | |||
| 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | 
            print(sf, end=", ")
    print()
    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
        hDstPy.write(dVars['gctests'])
    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)
    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)
    #### JAVASCRIPT
 | > | 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | 
            print(sf, end=", ")
    print()
    # TEST FILES
    with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
        hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
        hDstPy.write(dVars['gctests'])
        hDstPy.write("\n")
    createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)
    createServerOptions(sLang, dVars)
    createPackageZip(sLang, dVars, spLangPack)
    #### JAVASCRIPT
 | 
| ︙ | ︙ | |||
| 248 249 250 251 252 253 254 | 
        print()
        dVars["pluginsJS"] = sCodePlugins
        # options data struct
        dVars["dOptJavaScript"] = json.dumps(list(dVars["dOptJavaScript"].items()))
        dVars["dOptFirefox"] = json.dumps(list(dVars["dOptFirefox"].items()))
        dVars["dOptThunderbird"] = json.dumps(list(dVars["dOptThunderbird"].items()))
 | | | 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | 
        print()
        dVars["pluginsJS"] = sCodePlugins
        # options data struct
        dVars["dOptJavaScript"] = json.dumps(list(dVars["dOptJavaScript"].items()))
        dVars["dOptFirefox"] = json.dumps(list(dVars["dOptFirefox"].items()))
        dVars["dOptThunderbird"] = json.dumps(list(dVars["dOptThunderbird"].items()))
        # create folder
        spLangPack = "grammalecte-js/"+sLang
        helpers.createCleanFolder(spLangPack)
        # create files
        for sf in os.listdir("js_extension"):
            dVars[sf[:-3]] = open("js_extension/"+sf, "r", encoding="utf-8").read()
 | 
| ︙ | ︙ | |||
| 271 272 273 274 275 276 277 | 
        for sf in os.listdir(spLang+"/modules-js"):
            if not sf.startswith("gce_"):
                helpers.copyAndFileTemplate(spLang+"/modules-js/"+sf, spLangPack+"/"+sf, dVars)
                print(sf, end=", ")
        print()
        try:
 | | | > > | 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | 
        for sf in os.listdir(spLang+"/modules-js"):
            if not sf.startswith("gce_"):
                helpers.copyAndFileTemplate(spLang+"/modules-js/"+sf, spLangPack+"/"+sf, dVars)
                print(sf, end=", ")
        print()
        try:
            buildjs = importlib.import_module("gc_lang."+sLang+".build")
        except ImportError:
            print("# No complementary builder <build.py> in folder gc_lang/"+sLang)
        else:
            buildjs.build(sLang, dVars, spLangPack)
    return dVars['version']
def copyGraphspellCore (bJavaScript=False):
    "copy Graphspell package in Grammalecte package"
    helpers.createCleanFolder("grammalecte/graphspell")
    dir_util.mkpath("grammalecte/graphspell/_dictionaries")
    for sf in os.listdir("graphspell"):
        if not os.path.isdir("graphspell/"+sf):
            file_util.copy_file("graphspell/"+sf, "grammalecte/graphspell")
    if bJavaScript:
        helpers.createCleanFolder("grammalecte-js/graphspell")
        dir_util.mkpath("grammalecte-js/graphspell/_dictionaries")
        dVars = {}
        for sf in os.listdir("js_extension"):
            dVars[sf[:-3]] = open("js_extension/"+sf, "r", encoding="utf-8").read()
        for sf in os.listdir("graphspell-js"):
            if not os.path.isdir("graphspell-js/"+sf):
                file_util.copy_file("graphspell-js/"+sf, "grammalecte-js/graphspell")
                helpers.copyAndFileTemplate("graphspell-js/"+sf, "grammalecte-js/graphspell/"+sf, dVars)
def copyGraphspellDictionaries (dVars, bJavaScript=False, bExtendedDict=False, bCommunityDict=False, bPersonalDict=False):
    "copy requested Graphspell dictionaries in Grammalecte package"
    dVars["dic_main_filename_py"] = ""
    dVars["dic_main_filename_js"] = ""
    dVars["dic_extended_filename_py"] = ""
    dVars["dic_extended_filename_js"] = ""
    dVars["dic_community_filename_py"] = ""
    dVars["dic_community_filename_js"] = ""
    dVars["dic_personal_filename_py"] = ""
 | 
| ︙ | ︙ | |||
| 331 332 333 334 335 336 337 338 339 | 
            file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries")
            dVars['dic_'+sType+'_filename_js'] = sFileName + '.json'
    dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".bdic"
    dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json"
def buildDictionary (dVars, sType, bJavaScript=False):
    if sType == "main":
        spfLexSrc = dVars['lexicon_src']
 | > | | | | > | 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | 
            file_util.copy_file(spfJSDic, "grammalecte-js/graphspell/_dictionaries")
            dVars['dic_'+sType+'_filename_js'] = sFileName + '.json'
    dVars['dic_main_filename_py'] = dVars['dic_default_filename_py'] + ".bdic"
    dVars['dic_main_filename_js'] = dVars['dic_default_filename_js'] + ".json"
def buildDictionary (dVars, sType, bJavaScript=False):
    "build binary dictionary for Graphspell from lexicons"
    if sType == "main":
        spfLexSrc = dVars['lexicon_src']
        lSfDictDst = dVars['dic_filenames'].split(",")
        lDicName = dVars['dic_name'].split(",")
        lFilter = dVars['dic_filter'].split(",")
        for sfDictDst, sDicName, sFilter in zip(lSfDictDst, lDicName, lFilter):
            lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, sFilter, dVars['stemming_method'], int(dVars['fsa_method']))
    else:
        if sType == "extended":
            spfLexSrc = dVars['lexicon_extended_src']
            sfDictDst = dVars['dic_extended_filename']
            sDicName = dVars['dic_extended_name']
        elif sType == "community":
            spfLexSrc = dVars['lexicon_community_src']
            sfDictDst = dVars['dic_community_filename']
            sDicName = dVars['dic_community_name']
        elif sType == "personal":
            spfLexSrc = dVars['lexicon_personal_src']
            sfDictDst = dVars['dic_personal_filename']
            sDicName = dVars['dic_personal_name']
        lex_build.build(spfLexSrc, dVars['lang'], dVars['lang_name'], sfDictDst, bJavaScript, sDicName, "", dVars['stemming_method'], int(dVars['fsa_method']))
def main ():
    "build Grammalecte with requested options"
    print("Python: " + sys.version)
    xParser = argparse.ArgumentParser()
    xParser.add_argument("lang", type=str, nargs='+', help="lang project to generate (name of folder in /lang)")
    xParser.add_argument("-b", "--build_data", help="launch build_data.py (part 1 and 2)", action="store_true")
    xParser.add_argument("-bb", "--build_data_before", help="launch build_data.py (only part 1: before dictionary building)", action="store_true")
    xParser.add_argument("-ba", "--build_data_after", help="launch build_data.py (only part 2: before dictionary building)", action="store_true")
    xParser.add_argument("-d", "--dict", help="generate FSA dictionary", action="store_true")
 | 
| ︙ | ︙ | |||
| 401 402 403 404 405 406 407 | 
                xArgs.add_extended_dictionary = False
            if not dVars["lexicon_community_src"]:
                xArgs.add_community_dictionary = False
            if not dVars["lexicon_personal_src"]:
                xArgs.add_personal_dictionary = False
            # build data
 | | | | | | | | 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 | 
                xArgs.add_extended_dictionary = False
            if not dVars["lexicon_community_src"]:
                xArgs.add_community_dictionary = False
            if not dVars["lexicon_personal_src"]:
                xArgs.add_personal_dictionary = False
            # build data
            databuild = None
            if xArgs.build_data_before or xArgs.build_data_after:
                # lang data
                try:
                    databuild = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if databuild and xArgs.build_data_before:
                databuild.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict:
                buildDictionary(dVars, "main", xArgs.javascript)
                if xArgs.add_extended_dictionary:
                    buildDictionary(dVars, "extended", xArgs.javascript)
                if xArgs.add_community_dictionary:
                    buildDictionary(dVars, "community", xArgs.javascript)
                if xArgs.add_personal_dictionary:
                    buildDictionary(dVars, "personal", xArgs.javascript)
            if databuild and xArgs.build_data_after:
                databuild.after('gc_lang/'+sLang, dVars, xArgs.javascript)
            # copy dictionaries from Graphspell
            copyGraphspellDictionaries(dVars, xArgs.javascript, xArgs.add_extended_dictionary, xArgs.add_community_dictionary, xArgs.add_personal_dictionary)
            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )
 | 
| ︙ | ︙ | |||
| 444 445 446 447 448 449 450 | 
                    if xArgs.tests:
                        xTestSuite = unittest.TestLoader().loadTestsFromModule(tests)
                        unittest.TextTestRunner().run(xTestSuite)
                    if xArgs.perf or xArgs.perf_memo:
                        hDst = open("./gc_lang/"+sLang+"/perf_memo.txt", "a", encoding="utf-8", newline="\n")  if xArgs.perf_memo  else None
                        tests.perf(sVersion, hDst)
 | | | < | | | | 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | 
                    if xArgs.tests:
                        xTestSuite = unittest.TestLoader().loadTestsFromModule(tests)
                        unittest.TextTestRunner().run(xTestSuite)
                    if xArgs.perf or xArgs.perf_memo:
                        hDst = open("./gc_lang/"+sLang+"/perf_memo.txt", "a", encoding="utf-8", newline="\n")  if xArgs.perf_memo  else None
                        tests.perf(sVersion, hDst)
            # Firefox (obsolete)
            #if False:
            #    with helpers.cd("_build/xpi/"+sLang):
            #        spfFirefox = dVars['win_fx_dev_path']  if platform.system() == "Windows"  else dVars['linux_fx_dev_path']
            #        os.system('jpm run -b "' + spfFirefox + '"')
            if xArgs.web_ext or xArgs.firefox:
                with helpers.cd("_build/webext/"+sLang):
                    if xArgs.lint_web_ext:
                        os.system(r'web-ext lint -o text')
                    if xArgs.firefox:
                        # Firefox Developper edition
 | 
| ︙ | ︙ | 
Modified misc/grammalecte.sublime-syntax from [f7dfed6343] to [751faa3559].
| ︙ | ︙ | |||
| 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | 
    - match: '\b(-)?[0-9.]+\b'
      scope: constant.numeric
    # Bookmarks
    - match: '^!!.*|^\[\+\+\].*'
      scope: bookmark
    # Keywords are if, else.
    # Note that blackslashes don't need to be escaped within single quoted
    # strings in YAML. When using single quoted strings, only single quotes
    # need to be escaped: this is done by using two single quotes next to each
    # other.
    - match: '\b(?:if|else|and|or|not|in)\b'
      scope: keyword.python
    - match: '\b(?:True|False|None)\b'
      scope: constant.language
 | > > > > > > > > > > > > > | | | > > > > > > > > > > > > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | 
    - match: '\b(-)?[0-9.]+\b'
      scope: constant.numeric
    # Bookmarks
    - match: '^!!.*|^\[\+\+\].*'
      scope: bookmark
    # Bookmarks
    - match: '^GRAPH_NAME:.*'
      scope: bookmark
    # Graph
    - match: '^@@@@GRAPH: *(\w+) .*$'
      scope: graphline
      captures:
        1: string.graphname
    - match: '^@@@@(?:END_GRAPH .*$| *)'
      scope: graphline
    # Keywords are if, else.
    # Note that blackslashes don't need to be escaped within single quoted
    # strings in YAML. When using single quoted strings, only single quotes
    # need to be escaped: this is done by using two single quotes next to each
    # other.
    - match: '\b(?:if|else|and|or|not|in)\b'
      scope: keyword.python
    - match: '\b(?:True|False|None)\b'
      scope: constant.language
    - match: '\b(?:spell|morph|morphex|morphVC|stem|value|space_after|textarea0?\w*|before0?\w*|after0?\w*|word|option|define|define_from|select|exclude|analyse\w+|tag_\w+|apposition|is[A-Z]\w+|rewriteSubject|checkD\w+|getD\w+|has[A-Z]\w+|sugg[A-Z]\w+|switch[A-Z]\w+|ceOrCet|formatN\w+|mbUnit)\b'
      scope: entity.name.function
    - match: '\b(?:replace|endswith|startswith|search|upper|lower|capitalize|strip|rstrip|is(?:upper|lower|digit|title))\b'
      scope: support.function
    - match: '\becho\b'
      scope: support.function.debug
    - match: '\bre\b'
      scope: support.class
    # Regex rule option
    - match: '^__[\[<]([isu])[\]>](/\w+|)(\(\w+\)|)(![0-9]|)__|</?js>'
      scope: rule.options
      captures:
        1: rule.casing
        2: rule.optionname
        3: rule.rulename
        4: rule.priority
    # Graph rules option
    - match: '^__(\w+)(![0-9]|)__'
      scope: rule.options
      captures:
        1: rule.rulename2
        2: rule.priority
    - match: '/(\w+)/'
      scope: rule.actionoption
      captures:
        1: rule.optionname
    # Definitions and options
    - match: '^OPT(?:GROUP|LANG|PRIORITY)/|^OPTSOFTWARE:'
      scope: options.command
    - match: '^OPT(?:LABEL|)/'
      scope: options.parameter
 | 
| ︙ | ︙ | |||
| 82 83 84 85 86 87 88 | 
    # rule delimiters
    - match: '<<-|>>>'
      scope: keyword.action
    - match: '__also__'
      scope: keyword.condition.green
    - match: '__else__'
      scope: keyword.condition.red
 | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | 
    # rule delimiters
    - match: '<<-|>>>'
      scope: keyword.action
    - match: '__also__'
      scope: keyword.condition.green
    - match: '__else__'
      scope: keyword.condition.red
    - match: '-(\d*\.?(?::\.?\d+|))>>'
      scope: keyword.error
      captures:
        1: keyword.error.group
    - match: '~(\d*(?::\d+|))>>'
      scope: keyword.textprocessor
      captures:
        1: keyword.textprocessor.group
    - match: '=>>'
      scope: keyword.disambiguator
    - match: '/(\d*(?::\d+|))>>'
      scope: keyword.tag
      captures:
        1: keyword.tag.group
    - match: '%(\d*)>>'
      scope: keyword.tag
      captures:
        1: keyword.tag.group
    # Tokens
    - match: '(>)\w+'
      scope: string.lemma
      captures:
        1: keyword.valid
    - match: '(~)(?!(?:\d+(?::\d+|)|)>>)[^\s¬]*'
      scope: string.regex
      captures:
        1: keyword.valid
    - match: '(@)([^@\s¬]*)'
      scope: string.morph
      captures:
        1: keyword.valid
        2: string.morph.pattern
    - match: '(/)[\w-]+'
      scope: string.tag
      captures:
        1: keyword.valid
    - match: '(?<=[\[ |])([*])([^\s¬]+)'
      scope: string.morph
      captures:
        1: keyword.valid
        2: string.meta
    - match: '(¬)(\S+)'
      scope: string.morph
      captures:
        1: keyword.invalid
        2: string.morph.antipattern
    - match: '<(?:start|end)>'
      scope: string.token
    - match: '<>'
      scope: string.jumptoken
    # Escaped chars
    - match: '\\(?:\d+|w|d|b|n|s|t)'
      scope: constant.character.escape
    # URL
    - match: '\| ?https?://[\w./%?&=#+-]+'
      scope: string.other
    # Example errors
    - match: '{{.+?}}'
      scope: message.error
    # special chars
    - match: '[@=*^?¿!:+<>~]'
      scope: keyword.other
    - match: '\(\?(?:[:=!]|<!)|[(|)]'
      scope: keyword.parenthesis
    - match: '\[|[]{}]'
      scope: keyword.brackets
 | 
| ︙ | ︙ | 
Modified misc/grammalecte.tmTheme from [7305de87f8] to [a95cdb7606].
| ︙ | ︙ | |||
| 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | <dict> <key>foreground</key> <string>#A0F0FF</string> <key>background</key> <string>#0050A0</string> </dict> </dict> <dict> <key>name</key> <string>String</string> <key>scope</key> <string>string</string> <key>settings</key> <dict> | > > > > > > > > > > > > > > > | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | <dict> <key>foreground</key> <string>#A0F0FF</string> <key>background</key> <string>#0050A0</string> </dict> </dict> <dict> <key>name</key> <string>Graphline</string> <key>scope</key> <string>graphline</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(0, 100%, 80%)</string> <key>background</key> <string>hsl(0, 100%, 20%</string> <key>fontStyle</key> <string>bold</string> </dict> </dict> <dict> <key>name</key> <string>String</string> <key>scope</key> <string>string</string> <key>settings</key> <dict> | 
| ︙ | ︙ | |||
| 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | <string>#F0F060</string> <key>background</key> <string>#602020</string> <key>fontStyle</key> <string>bold</string> </dict> </dict> <dict> <key>name</key> <string>Keyword textprocessor</string> <key>scope</key> <string>keyword.textprocessor</string> <key>settings</key> <dict> | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | <string>#F0F060</string> <key>background</key> <string>#602020</string> <key>fontStyle</key> <string>bold</string> </dict> </dict> <dict> <key>name</key> <string>Keyword tag</string> <key>scope</key> <string>keyword.tag</string> <key>settings</key> <dict> <key>foreground</key> <string>#FF70FF</string> <key>background</key> <string>#602060</string> <key>fontStyle</key> <string>bold</string> </dict> </dict> <dict> <key>name</key> <string>Keyword tag group</string> <key>scope</key> <string>keyword.tag.group</string> <key>settings</key> <dict> <key>foreground</key> <string>#F0B0F0</string> <key>background</key> <string>#602060</string> <key>fontStyle</key> <string>bold</string> </dict> </dict> <dict> <key>name</key> <string>Keyword textprocessor</string> <key>scope</key> <string>keyword.textprocessor</string> <key>settings</key> <dict> | 
| ︙ | ︙ | |||
| 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | <key>settings</key> <dict> <key>foreground</key> <string>#A0A0A0</string> </dict> </dict> <dict> <key>name</key> <string>Rule options</string> <key>scope</key> <string>rule.options</string> <key>settings</key> <dict> | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 | <key>settings</key> <dict> <key>foreground</key> <string>#A0A0A0</string> </dict> </dict> <dict> <key>name</key> <string>Keyword Valid</string> <key>scope</key> <string>keyword.valid</string> <key>settings</key> <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>hsl(150, 100%, 80%)</string> <key>background</key> <string>hsl(150, 100%, 20%)</string> </dict> </dict> <dict> <key>name</key> <string>Keyword Invalid</string> <key>scope</key> <string>keyword.invalid</string> <key>settings</key> <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>hsl(0, 100%, 80%)</string> <key>background</key> <string>hsl(0, 100%, 20%)</string> </dict> </dict> <dict> <key>name</key> <string>Rule options</string> <key>scope</key> <string>rule.options</string> <key>settings</key> <dict> | 
| ︙ | ︙ | |||
| 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 | <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>#30C0F0</string> </dict> </dict> <dict> <key>name</key> <string>Rule option name</string> <key>scope</key> <string>rule.optionname</string> <key>settings</key> <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> | > > > > > > > > > > > > > > > | > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 | <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>#30C0F0</string> </dict> </dict> <dict> <key>name</key> <string>Rule action option</string> <key>scope</key> <string>rule.actionoption</string> <key>settings</key> <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>hsl(0, 50%, 50%)</string> <key>background</key> <string>hsl(330, 50%, 20%)</string> </dict> </dict> <dict> <key>name</key> <string>Rule option name</string> <key>scope</key> <string>rule.optionname</string> <key>settings</key> <dict> <key>fontStyle</key> <string>bold</string> <key>foreground</key> <string>hsl(330, 80%, 80%)</string> <key>background</key> <string>hsl(330, 60%, 20%)</string> </dict> </dict> <dict> <key>name</key> <string>Rule name</string> <key>scope</key> <string>rule.rulename</string> <key>settings</key> <dict> <key>fontStyle</key> <string>italic</string> <key>foreground</key> <string>#A0A0A0</string> </dict> </dict> <dict> <key>name</key> <string>Rule name</string> <key>scope</key> <string>rule.rulename2</string> <key>settings</key> <dict> <key>foreground</key> <string>#F0D080</string> </dict> </dict> <dict> <key>name</key> <string>Rule priority</string> <key>scope</key> <string>rule.priority</string> <key>settings</key> <dict> <key>foreground</key> <string>#F06060</string> </dict> </dict> <dict> <key>name</key> <string>String meta</string> <key>scope</key> <string>string.meta</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(270, 100%, 90%)</string> <key>background</key> <string>hsl(270, 100%, 40%)</string> </dict> </dict> <dict> <key>name</key> <string>String token</string> <key>scope</key> <string>string.token</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(240, 50%, 90%)</string> <key>background</key> <string>hsl(240, 50%, 40%)</string> </dict> </dict> <dict> <key>name</key> <string>String Jumptoken</string> <key>scope</key> <string>string.jumptoken</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(0, 50%, 90%)</string> <key>background</key> <string>hsl(10, 50%, 40%)</string> </dict> </dict> <dict> <key>name</key> <string>String lemma</string> <key>scope</key> <string>string.lemma</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(210, 100%, 80%)</string> <key>background</key> <string>hsl(210, 100%, 15%)</string> </dict> </dict> <dict> <key>name</key> <string>String tag</string> <key>scope</key> <string>string.tag</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(30, 100%, 90%)</string> <key>background</key> <string>hsl(30, 100%, 20%)</string> </dict> </dict> <dict> <key>name</key> <string>String regex</string> <key>scope</key> <string>string.regex</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(60, 100%, 80%)</string> <key>background</key> <string>hsl(60, 100%, 10%)</string> </dict> </dict> <dict> <key>name</key> <string>String morph pattern</string> <key>scope</key> <string>string.morph.pattern</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(150, 80%, 90%)</string> <key>background</key> <string>hsl(150, 80%, 10%)</string> </dict> </dict> <dict> <key>name</key> <string>String morph antipattern</string> <key>scope</key> <string>string.morph.antipattern</string> <key>settings</key> <dict> <key>foreground</key> <string>hsl(0, 80%, 90%)</string> <key>background</key> <string>hsl(0, 80%, 10%)</string> </dict> </dict> <dict> <key>name</key> <string>JavaScript Dollar</string> <key>scope</key> <string>variable.other.dollar.only.js</string> <key>settings</key> | 
| ︙ | ︙ |