Grammalecte: Changes On Branch 8d8f667d6938d550

Changes In Branch rg Through [8d8f667d69] Excluding Merge-Ins

This is equivalent to a diff from f2d8271145 to 8d8f667d69

2018-05-23
11:33		[graphspell][js][bug] return Array instead of Set check-in: a54634de77 user: olr tags: graphspell, rg
11:32		[graphspell][py] wrong comment check-in: 8d8f667d69 user: olr tags: graphspell, rg
10:29		[graphspell][js] data memorization check-in: e7244953ec user: olr tags: graphspell, rg
2018-05-16
16:22		[fr] pt: descente aux enfers/flambeaux check-in: b5310203be user: olr tags: trunk, fr
16:14		[build][core] rules graph: first draft check-in: 061252f41e user: olr tags: core, build, rg
11:58		[graphspell][bug] fix affixes occurrences calculation check-in: f2d8271145 user: olr tags: trunk, graphspell
2018-05-15
12:51		[fr] test contre faux positif check-in: f8bf9c3922 user: olr tags: trunk, fr

Added compile_rules_graph.py version [7c9c436423].

Added darg.py version [bf378d22b5].

Modified gc_core/py/lang_core/gc_engine.py from [72ecd7c680] to [29b43c054f].

Added gc_core/py/lang_core/gc_rules_graph.py version [e9a58f5498].

Added gc_core/py/lang_core/gc_sentence.py version [90cbca3aed].

Modified gc_lang/fr/modules/gce_analyseur.py from [39975de0ac] to [50ac148025].

Modified gc_lang/fr/modules/gce_suggestions.py from [79835965e4] to [818aeb6977].

Added gc_lang/fr/rules_graph.grx version [0c5fd71826].

Modified graphspell-js/spellchecker.js from [3df103d578] to [9fcf40c037].

Modified graphspell-js/tokenizer.js from [bdd895b918] to [9bd60cca8a].

Modified graphspell/spellchecker.py from [cbd22d2c4d] to [2c7f3d8dbe].

Modified graphspell/tokenizer.py from [17f452887e] to [b3cbfe75ea].

Modified make.py from [14e0172bf2] to [ff9ae5f2b3].

︙
8 9 10 11 12 13 14 15 16 17 18 19 20 21	8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24	+ + +	#import unicodedata from itertools import chain from ..graphspell.spellchecker import SpellChecker from ..graphspell.echo import echo from . import gc_options from ..graphspell.tokenizer import Tokenizer from .gc_rules_graph import dGraph __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \ "load", "parse", "getSpellChecker", \ "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \ "ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ] __version__ = "${version}"
︙
31 32 33 34 35 36 37 ~~38 39~~ 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83	34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86	- - + - + - + + - +	_rules = None # module gc_rules # data _sAppContext = "" # what software is running _dOptions = None _aIgnoredRules = set() _oSpellChecker = None ~~_dAnalyses = {} # cache for data from dictionary~~ _oTokenizer = None #### Parsing def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False): "analyses the paragraph sText and returns list of errors" #sText = unicodedata.normalize("NFC", sText) aErrors = None ~~sAlt = sText~~ sRealText = sText dDA = {} # Disambiguisator. Key = position; value = list of morphologies dPriority = {} # Key = position; value = priority dOpt = _dOptions if not dOptions else dOptions # parse paragraph try: ~~sNew, aErrors = _proofread(sText, sAlt, 0, True, dDA, dPriority, sCountry, dOpt, bDebug, bContext)~~ sNew, aErrors = _proofread(sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bDebug, bContext) if sNew: sText = sNew except: raise # cleanup if " " in sText: sText = sText.replace(" ", ' ') # nbsp if " " in sText: sText = sText.replace(" ", ' ') # nnbsp if "'" in sText: sText = sText.replace("'", "’") if "‑" in sText: sText = sText.replace("‑", "-") # nobreakdash # parse sentences for iStart, iEnd in _getSentenceBoundaries(sText): if 4 < (iEnd - iStart) < 2000: dDA.clear() try: # regex parser ~~_, errs = _proofread(sText[iStart:iEnd], sAlt[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)~~ _, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext) aErrors.update(errs) except: raise return aErrors.values() # this is a view (iterable) def _getSentenceBoundaries (sText):
︙
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304	290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310	+ + +	_createError = _createDictError def load (sContext="Python"): global _oSpellChecker global _sAppContext global _dOptions global _oTokenizer try: _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}") _sAppContext = sContext _dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default _oTokenizer = _oSpellChecker.getTokenizer() _oSpellChecker.activateStorage() except: traceback.print_exc() def setOption (sOpt, bVal): if sOpt in _dOptions: _dOptions[sOpt] = bVal
︙
367 368 369 370 371 372 373 ~~374 375 376 377 378~~ 379 380 381 382 383 384 385 386 387 388 389 390 ~~391 392~~ 393 394 395 ~~396~~ 397 398 ~~399 400 401 402 403 404 405~~ 406 407 408 409 410 ~~411 412 413~~ 414 415 416 417 418 419 420 421 422 423 424 425 ~~426~~ 427 ~~428~~ 429 430 431 432 433 434 435 436 437 438 439 ~~440~~ ~~441 442~~ 443 444 445 ~~446 447~~ 448 449 450 451 ~~452~~ 453 454 455 ~~456~~ 457 458 459 ~~460~~ 461 ~~462 463 464 465 466 467 468 469~~ 470 471 472 473 474 475 476	373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466	- - - - - + + + + + - - + + + - + - - - - - - - - - - + - + + - - + - - + - - + + - + + - + - + - - - - - - - -	return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py") #### common functions # common regexes _zEndOfSentence = re.compile('([.?!:;…][ .?!… »”")]\|.$)') _zBeginOfParagraph = re.compile("^\W") _zEndOfParagraph = re.compile("\W$") _zNextWord = re.compile(" +(\w[\w-])") _zPrevWord = re.compile("(\w[\w-]) +$") _zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]\|.$)') _zBeginOfParagraph = re.compile(r"^\W") _zEndOfParagraph = re.compile(r"\W$") _zNextWord = re.compile(r" +(\w[\w-])") _zPrevWord = re.compile(r"(\w[\w-]) +$") def option (sOpt): "return True if option sOpt is active" return _dOptions.get(sOpt, False) def displayInfo (dDA, tWord): "for debugging: retrieve info of word" if not tWord: echo("> nothing to find") return True ~~~~if tWord[1] not in _dAnalyses and not _store~~MorphFr~~omFSA~~(tWord[1]): echo("> not in ~~FSA~~")~~ lMorph = _oSpellChecker.getMorph(tWord[1]) if not lMorph: echo("> not in dictionary") return True if tWord[0] in dDA: echo("DA: " + str(dDA[tWord[0]])) ~~echo("FSA: " + str(~~_dAnalyses[tW~~or~~d[1]]~~))~~ echo("FSA: " + str(lMorph)) return True ~~def _storeMorphFromFSA (sWord):~~ ~~"retrieves morphologies list from _oSpellChecker -> _dAnalyses"~~ ~~global _dAnalyses~~ ~~_dAnalyses[sWord] = _oSpellChecker.getMorph(sWord)~~ ~~return True if _dAnalyses[sWord] else False~~ def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False): "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)" if not tWord: return bNoWord ~~~~if tWord[1] not in _dAnalyses and not _storeMorphFromFSA(tWord[1]):~~ ~~return False~~ lMorph = dDA[tWord[0]] if tWord[0] in dDA else _~~dAna~~l~~yses[~~tWord[1]]~~ lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False p = re.compile(sPattern) if bStrict: return all(p.search(s) for s in lMorph) return any(p.search(s) for s in lMorph) def morphex (dDA, tWord, sPattern, sNegPattern, bNoWord=False): "analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)" if not tWord: return bNoWord ~~if tWord[~~1] not~~ in ~~_dAnaly~~se~~s and not~~ _~~store~~Morph~~FromFSA~~(tWord[1]):~~ lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1]) if not lMorph: return False ~~lMorph = dDA[tWord[0]] if tWord[0] in dDA else _dAnalyses[tWord[1]]~~ # check negative condition np = re.compile(sNegPattern) if any(np.search(s) for s in lMorph): return False # search sPattern p = re.compile(sPattern) return any(p.search(s) for s in lMorph) def analyse (sWord, sPattern, bStrict=True): "analyse a word, return True if sPattern in morphologies (disambiguation off)" ~~~~if sWord not in _dAnalyses and not _store~~MorphFr~~omFSA~~(sWord):~~ lMorph = _oSpellChecker.getMorph(sWord) ~~~~return False~~ if not ~~_dAnalyses[sW~~ord]:~~ if not lMorph: return False p = re.compile(sPattern) if bStrict: ~~return all(p.search(s) for s in ~~_dAnalyses[sW~~ord]) return any(p.search(s) for s in ~~_dAnalyses[sW~~ord])~~ return all(p.search(s) for s in lMorph) return any(p.search(s) for s in lMorph) def analysex (sWord, sPattern, sNegPattern): "analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)" ~~~~if sWord not in _dAnalyses and not _store~~MorphFr~~omFSA~~(sWord):~~ lMorph = _oSpellChecker.getMorph(sWord) if not lMorph: return False # check negative condition np = re.compile(sNegPattern) ~~if any(np.search(s) for s in ~~_dAnalyses[sW~~ord]):~~ if any(np.search(s) for s in lMorph): return False # search sPattern p = re.compile(sPattern) ~~return any(p.search(s) for s in ~~_dAnalyses[sW~~ord])~~ return any(p.search(s) for s in lMorph) ~~def stem (sWord):~~ ~~"returns a list of sWord's stems"~~ ~~if not sWord:~~ ~~return []~~ ~~if sWord not in _dAnalyses and not _storeMorphFromFSA(sWord):~~ ~~return []~~ ~~return [ s[1:s.find(" ")] for s in _dAnalyses[sWord] ]~~ ## functions to get text outside pattern scope # warning: check compile_rules.py to understand how it works def nextword (s, iStart, n):
︙
532 533 534 535 536 537 538 ~~539~~ ~~540 541~~ 542 ~~543~~ 544 ~~545~~ 546 ~~547~~ 548 549 ~~550~~ 551 552 553 554 555 556 557 558 ~~559~~ ~~560 561~~ 562 ~~563~~ 564 ~~565~~ 566 ~~567~~ 568 569 ~~570~~ 571 572 573 574 575 ~~576~~ 577 578 579 580 581 582 583 584 585	522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570	- + - - + - + - + - - - + - - + - + - + - - - + +	#### Disambiguator def select (dDA, nPos, sWord, sPattern, lDefault=None): if not sWord: return True if nPos in dDA: return True ~~~~if sWord not in _dAnalyses and not _store~~MorphFr~~omFSA~~(sWord):~~ lMorph = _oSpellChecker.getMorph(sWord) ~~~~return True~~ if len(~~_dAnalyses[sW~~ord]) == 1:~~ if not lMorph or len(lMorph) == 1: return True ~~lSelect = [ sMorph for sMorph in ~~_dAnalyses[sW~~ord] if re.search(sPattern, sMorph) ]~~ lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ] if lSelect: ~~if len(lSelect) != len(~~_dAnalyses[sW~~ord]):~~ if len(lSelect) != len(lMorph): dDA[nPos] = lSelect ~~#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))~~ elif lDefault: dDA[nPos] = lDefault ~~#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))~~ return True def exclude (dDA, nPos, sWord, sPattern, lDefault=None): if not sWord: return True if nPos in dDA: return True ~~~~if sWord not in _dAnalyses and not _store~~MorphFr~~omFSA~~(sWord):~~ lMorph = _oSpellChecker.getMorph(sWord) ~~~~return True~~ if len(~~_dAnalyses[sW~~ord]) == 1:~~ if not lMorph or len(lMorph) == 1: return True ~~lSelect = [ sMorph for sMorph in ~~_dAnalyses[sW~~ord] if not re.search(sPattern, sMorph) ]~~ lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ] if lSelect: ~~if len(lSelect) != len(~~_dAnalyses[sW~~ord]):~~ if len(lSelect) != len(lMorph): dDA[nPos] = lSelect ~~#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))~~ elif lDefault: dDA[nPos] = lDefault ~~#echo("= "+sWord+" "+str(dDA.get(nPos, "null")))~~ return True def define (dDA, nPos, lMorph): dDA[nPos] = lMorph ~~#echo("= "+str(nPos)+" "+str(dDA[nPos]))~~ return True #### GRAMMAR CHECKER PLUGINS ${plugins} #### CALLABLES (generated code) ${callables}

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24	- + - +	#### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms from . import conj from . import mfsp from . import phonet ## Verbs def suggVerb (sFlex, sWho, funcSugg2=None): aSugg = set() ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: # we get the tense aTense = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []): # we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~~~ for sMorph in _oSpellChecker.getMorph(sFlex): for m in re.finditer(">"+sStem+" .*?(:(?:Y\|I[pqsf]\|S[pq]\|K\|P))", sMorph): # stem must be used in regex to prevent confusion between different verbs (e.g. sauras has 2 stems: savoir and saurer) if m: if m.group(1) == ":Y": aTense.add(":Ip") aTense.add(":Iq") aTense.add(":Is")
︙
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52	38 39 40 41 42 43 44 45 46 47 48 49 50 51 52	- +	if aSugg: return "\|".join(aSugg) return "" def suggVerbPpas (sFlex, sWhat=None): aSugg = set() ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: if not sWhat: aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3")) aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"))
︙
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 ~~113~~ 114 115 116 117 118 119 120	81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120	- + - + - +	if aSugg: return "\|".join(aSugg) return "" def suggVerbTense (sFlex, sTense, sWho): aSugg = set() ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): if conj.hasConj(sStem, sTense, sWho): aSugg.add(conj.getConj(sStem, sTense, sWho)) if aSugg: return "\|".join(aSugg) return "" def suggVerbImpe (sFlex): aSugg = set() ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: if conj._hasConjWithTags(tTags, ":E", ":2s"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s")) if conj._hasConjWithTags(tTags, ":E", ":1p"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p")) if conj._hasConjWithTags(tTags, ":E", ":2p"): aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p")) if aSugg: return "\|".join(aSugg) return "" def suggVerbInfi (sFlex): ~~return "\|".join([ sStem for sStem in ~~stem~~(sFlex) if conj.isVerb(sStem) ])~~ return "\|".join([ sStem for sStem in _oSpellChecker.getLemma(sFlex) if conj.isVerb(sStem) ]) _dQuiEst = { "je": ":1s", "j’": ":1s", "j’en": ":1s", "j’y": ":1s", \ "tu": ":2s", "il": ":3s", "on": ":3s", "elle": ":3s", "nous": ":1p", "vous": ":2p", "ils": ":3p", "elles": ":3p" } _lIndicatif = [":Ip", ":Iq", ":Is", ":If"] _lSubjonctif = [":Sp", ":Sq"]
︙
129 130 131 132 133 134 135 ~~136~~ 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 ~~152~~ 153 ~~154~~ 155 156 157 158 159 160 161	129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162	- + + - + - +	return "" sWho = _dQuiEst.get(sSuj.lower(), None) if not sWho: if sSuj[0:1].islower(): # pas un pronom, ni un nom propre return "" sWho = ":3s" aSugg = set() ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): tTags = conj._getTags(sStem) if tTags: for sTense in lMode: if conj._hasConjWithTags(tTags, sTense, sWho): aSugg.add(conj._getConjWithTags(sStem, tTags, sTense, sWho)) if aSugg: return "\|".join(aSugg) return "" ## Nouns and adjectives def suggPlur (sFlex, sWordToAgree=None): "returns plural forms assuming sFlex is singular" if sWordToAgree: lMorph = _oSpellChecker.getMorph(sFlex) ~~if ~~sWordToAgree not in _dAnalyses and not _store~~Morph~~FromFSA(sWordToAgree)~~:~~ if not lMorph: return "" ~~sGender = cr.getGender(~~_dAnalyses.get(sW~~or~~dToAgree, [])~~)~~ sGender = cr.getGender(lMorph) if sGender == ":m": return suggMasPlur(sFlex) elif sGender == ":f": return suggFemPlur(sFlex) aSugg = set() if "-" not in sFlex: if sFlex.endswith("l"):
︙
189 190 191 192 193 194 195 ~~196~~ 197 ~~198~~ 199 200 201 202 203 204 205	190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205	- - +	if aSugg: return "\|".join(aSugg) return "" def suggMasSing (sFlex, bSuggSimil=False): "returns masculine singular forms" ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if not ":V" in sMorph: # not a verb if ":m" in sMorph or ":e" in sMorph: aSugg.add(suggSing(sFlex)) else: sStem = cr.getLemmaOfMorph(sMorph) if mfsp.isFemForm(sStem):
︙
217 218 219 220 221 222 223 ~~224~~ 225 ~~226~~ 227 228 229 230 231 232 233	217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232	- - +	if aSugg: return "\|".join(aSugg) return "" def suggMasPlur (sFlex, bSuggSimil=False): "returns masculine plural forms" ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if not ":V" in sMorph: # not a verb if ":m" in sMorph or ":e" in sMorph: aSugg.add(suggPlur(sFlex)) else: sStem = cr.getLemmaOfMorph(sMorph) if mfsp.isFemForm(sStem):
︙
248 249 250 251 252 253 254 ~~255~~ 256 ~~257~~ 258 259 260 261 262 263 264	247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262	- - +	if aSugg: return "\|".join(aSugg) return "" def suggFemSing (sFlex, bSuggSimil=False): "returns feminine singular forms" ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if not ":V" in sMorph: # not a verb if ":f" in sMorph or ":e" in sMorph: aSugg.add(suggSing(sFlex)) else: sStem = cr.getLemmaOfMorph(sMorph) if mfsp.isFemForm(sStem):
︙
274 275 276 277 278 279 280 ~~281~~ 282 ~~283~~ 284 285 286 287 288 289 290	272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287	- - +	if aSugg: return "\|".join(aSugg) return "" def suggFemPlur (sFlex, bSuggSimil=False): "returns feminine plural forms" ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if not ":V" in sMorph: # not a verb if ":f" in sMorph or ":e" in sMorph: aSugg.add(suggPlur(sFlex)) else: sStem = cr.getLemmaOfMorph(sMorph) if mfsp.isFemForm(sStem):
︙
299 300 301 302 303 304 305 ~~306~~ 307 308 309 310 311 312 313 314 ~~315~~ 316 317 318 319 320 321 322 323 324 ~~325~~ 326 327 ~~328~~ 329 330 331 332 333 334 335 336 337 338 339 340 341 342 ~~343~~ 344 345 346 347 348 ~~349~~ 350 351 352 353 354 355 356 357 358 359 ~~360~~ 361 ~~362~~ 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 ~~378~~ 379 ~~380~~ 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 ~~397 398~~ 399 400 401 402 403 404 405	296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398	- + - + - - + - + - + - - + - - + - - +	aSugg.add(e) if aSugg: return "\|".join(aSugg) return "" def hasFemForm (sFlex): ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"): return True if phonet.hasSimil(sFlex, ":f"): return True return False def hasMasForm (sFlex): ~~for sStem in ~~stem~~(sFlex):~~ for sStem in _oSpellChecker.getLemma(sFlex): if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"): # what has a feminine form also has a masculine form return True if phonet.hasSimil(sFlex, ":m"): return True return False def switchGender (sFlex, bPlur=None): ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() if bPlur == None: ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: if ":s" in sMorph: aSugg.add(suggMasSing(sFlex)) elif ":p" in sMorph: aSugg.add(suggMasPlur(sFlex)) elif ":m" in sMorph: if ":s" in sMorph: aSugg.add(suggFemSing(sFlex)) elif ":p" in sMorph: aSugg.add(suggFemPlur(sFlex)) else: aSugg.add(suggFemSing(sFlex)) aSugg.add(suggFemPlur(sFlex)) elif bPlur: ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: aSugg.add(suggMasPlur(sFlex)) elif ":m" in sMorph: aSugg.add(suggFemPlur(sFlex)) else: ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if ":f" in sMorph: aSugg.add(suggMasSing(sFlex)) elif ":m" in sMorph: aSugg.add(suggFemSing(sFlex)) if aSugg: return "\|".join(aSugg) return "" def switchPlural (sFlex): ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = set() ~~for sMorph in _~~dAna~~l~~yses~~.get(sFlex~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sFlex): if ":s" in sMorph: aSugg.add(suggPlur(sFlex)) elif ":p" in sMorph: aSugg.add(suggSing(sFlex)) if aSugg: return "\|".join(aSugg) return "" def hasSimil (sWord, sPattern=None): return phonet.hasSimil(sWord, sPattern) def suggSimil (sWord, sPattern=None, bSubst=False): "return list of words phonetically similar to sWord and whom POS is matching sPattern" ~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ aSugg = phonet.selectSimil(sWord, sPattern) ~~for sMorph in _~~dAna~~l~~yses~~.get(sWord~~, []~~):~~ for sMorph in _oSpellChecker.getMorph(sWord): aSugg.update(conj.getSimil(sWord, sMorph, bSubst)) break if aSugg: return "\|".join(aSugg) return "" def suggCeOrCet (sWord): if re.match("(?i)[aeéèêiouyâîï]", sWord): return "cet" if sWord[0:1] == "h" or sWord[0:1] == "H": return "ce\|cet" return "ce" def suggLesLa (sWord): ~~~~# we don’t check if word exists in _dAnalyses, for it is assumed it has been done before~~ if any( ":p" in sMorph for sMorph in _~~dAna~~l~~yses~~.get(sWord~~, []~~) ):~~ if any( ":p" in sMorph for sMorph in _oSpellChecker.getMorph(sWord) ): return "les\|la" return "la" _zBinary = re.compile("^[01]+$") def formatNumber (s):
︙

︙
39 40 41 42 43 44 45 46 47 48 49 50 51 52	39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56	+ + + +	this.oExtendedDic = this._loadDictionary(extentedDic, sPath); this.oCommunityDic = this._loadDictionary(communityDic, sPath); this.oPersonalDic = this._loadDictionary(personalDic, sPath); this.bExtendedDic = Boolean(this.oExtendedDic); this.bCommunityDic = Boolean(this.oCommunityDic); this.bPersonalDic = Boolean(this.oPersonalDic); this.oTokenizer = null; // storage this.bStorage = false; this._dMorphologies = new Map(); // key: flexion, value: list of morphologies this._dLemmas = new Map(); // key: flexion, value: list of lemmas } _loadDictionary (dictionary, sPath="", bNecessary=false) { // returns an IBDAWG object if (!dictionary) { return null; }
︙
130 131 132 133 134 135 136 137 138 139 140 141 142 143	134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163	+ + + + + + + + + + + + + + + +	this.bCommunityDic = false; } deactivatePersonalDictionary () { this.bPersonalDic = false; } // Storage activateStorage () { this.bStorage = true; } deactivateStorage () { this.bStorage = false; } clearStorage () { this._dLemmas.clear(); this._dMorphologies.clear(); } // parse text functions parseParagraph (sText) { if (!this.oTokenizer) { this.loadTokenizer(); }
︙
201 202 203 204 205 206 207 ~~208~~ 209 ~~210~~ 211 212 ~~213~~ 214 215 ~~216~~ 217 ~~218~~ 219 220 221 222 223 224 225	221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263	+ + + - + - + - + - + + + + + - + + + + + + + + + + + +	return true; } return false; } getMorph (sWord) { // retrieves morphologies list, different casing allowed if (this.bStorage && this._dMorphologies.has(sWord)) { return this._dMorphologies.get(sWord); } ~~let l~~Result~~ = this.oMainDic.getMorph(sWord);~~ let lMorph = this.oMainDic.getMorph(sWord); if (this.bExtendedDic) { ~~l~~Result~~.push(...this.oExtendedDic.getMorph(sWord));~~ lMorph.push(...this.oExtendedDic.getMorph(sWord)); } if (this.bCommunityDic) { ~~l~~Result~~.push(...this.oCommunityDic.getMorph(sWord));~~ lMorph.push(...this.oCommunityDic.getMorph(sWord)); } if (this.bPersonalDic) { ~~l~~Result~~.push(...this.oPersonalDic.getMorph(sWord));~~ lMorph.push(...this.oPersonalDic.getMorph(sWord)); } if (this.bStorage) { this._dMorphologies.set(sWord, lMorph); this._dLemmas.set(sWord, new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(0, sMorph.indexOf(" ")); }))); } ~~return l~~Result~~;~~ return lMorph; } getLemma (sWord) { // retrieves lemmas if (this.bStorage) { if (!this._dLemmas.has(sWord)) { this.getMorph(sWord); } return this._dLemmas.get(sWord); } return new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(0, sMorph.indexOf(" ")); })); } * suggest (sWord, nSuggLimit=10) { // generator: returns 1, 2 or 3 lists of suggestions yield this.oMainDic.suggest(sWord, nSuggLimit); if (this.bExtendedDic) { yield this.oExtendedDic.suggest(sWord, nSuggLimit);
︙

︙
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44	14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44	- + - +	const aTkzPatterns = { // All regexps must start with ^. "default": [ [/^[  \t]+/, 'SPACE'], [/^\/(?:~\|bin\|boot\|dev\|etc\|home\|lib\|mnt\|opt\|root\|sbin\|tmp\|usr\|var\|Bureau\|Documents\|Images\|Musique\|Public\|Téléchargements\|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.()-]+)/, 'FOLDERUNIX'], [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)\|)\|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.()-]+)/, 'FOLDERWIN'], ~~[/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],~~ [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]/, 'SEPARATOR'], [/^[A-Z][.][A-Z][.](?:[A-Z][.])/, 'ACRONYM'], [/^(?:https?:\/\/\|www[.]\|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.\/?&!%=+"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+.?>\|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+ >/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;\|)/, 'HTMLENTITY'], [/^\d\d?h\d\d\b/, 'HOUR'], [/^-?\d+(?:[.,]\d+\|)/, 'NUM'], [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+)/, 'WORD'] ], "fr": [ [/^[  \t]+/, 'SPACE'], [/^\/(?:~\|bin\|boot\|dev\|etc\|home\|lib\|mnt\|opt\|root\|sbin\|tmp\|usr\|var\|Bureau\|Documents\|Images\|Musique\|Public\|Téléchargements\|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.()-]+)/, 'FOLDERUNIX'], [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)\|)\|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.()-]+)/, 'FOLDERWIN'], ~~[/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]+/, 'SEPARATOR'],~~ [/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]/, 'SEPARATOR'], [/^[A-Z][.][A-Z][.](?:[A-Z][.])/, 'ACRONYM'], [/^(?:https?:\/\/\|www[.]\|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_.\/?&!%=+"'@$#-]+/, 'LINK'], [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ_-]+/, 'TAG'], [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+.?>\|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+ *>/, 'HTML'], [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯﬁ-ﬆ]+\]/, 'PSEUDOHTML'], [/^&\w+;(?:\w+;\|)/, 'HTMLENTITY'], [/^(?:l\|d\|n\|m\|t\|s\|j\|c\|ç\|lorsqu\|puisqu\|jusqu\|quoiqu\|qu)['’`]/i, 'ELPFX'],
︙
58 59 60 61 62 63 64 65 66 67 68 69 70 71 ~~72 73 74 75 76~~ ~~77 78~~ 79 80 81 82 83 84 85 ~~86 87~~ 88 89 90 91 92 93 94 95	58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91	- + - + + - + - - - - - + - - + - - + +	this.sLang = "default"; } this.aRules = aTkzPatterns[this.sLang]; } * genTokens (sText) { let m; ~~let i = 0;~~ let iNext = 0; while (sText) { ~~let nCut = 1;~~ let iCut = 1; let iToken = 0; for (let [zRegex, sType] of this.aRules) { try { if ((m = zRegex.exec(sText)) !== null) { ~~if ~~(sType =~~= ~~'SEPARATOR') {~~~~ iToken += 1; ~~for (let c of m[0]) {~~ ~~yield { "sType": sType, "sValue": c, "nStart": i, "nEnd": i + m[0].length }~~ } ~~} else {~~ yield { "sType": sType, "sValue": m[0], "nStart": i, "nEnd": i + m[0].length } yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length } ~~} nCut = m[0].length;~~ iCut = m[0].length; break; } } catch (e) { helpers.logerror(e); } } ~~i ~~+= n~~Cut; sText = sText.slice(nCut);~~ iNext += iCut; sText = sText.slice(iCut); } } } if (typeof(exports) !== 'undefined') { exports.Tokenizer = Tokenizer; }

︙
32 33 34 35 36 37 38 39 40 41 42 43 44 45	32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49	+ + + +	self.oExtendedDic = self._loadDictionary(sfExtendedDic) self.oCommunityDic = self._loadDictionary(sfCommunityDic) self.oPersonalDic = self._loadDictionary(sfPersonalDic) self.bExtendedDic = bool(self.oExtendedDic) self.bCommunityDic = bool(self.oCommunityDic) self.bPersonalDic = bool(self.oPersonalDic) self.oTokenizer = None # storage self.bStorage = False self._dMorphologies = {} # key: flexion, value: list of morphologies self._dLemmas = {} # key: flexion, value: list of lemmas def _loadDictionary (self, source, bNecessary=False): "returns an IBDAWG object" if not source: return None try: return ibdawg.IBDAWG(source)
︙
95 96 97 98 99 100 101 102 103 104 105 106 107 108	99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125	+ + + + + + + + + + + + +	def deactivateCommunityDictionary (self): self.bCommunityDic = False def deactivatePersonalDictionary (self): self.bPersonalDic = False # Storage def activateStorage (self): self.bStorage = True def deactivateStorage (self): self.bStorage = False def clearStorage (self): self._dLemmas.clear() self._dMorphologies.clear() # parse text functions def parseParagraph (self, sText, bSpellSugg=False): if not self.oTokenizer: self.loadTokenizer() aSpellErrs = []
︙
167 168 169 170 171 172 173 ~~174~~ 175 ~~176~~ 177 ~~178~~ 179 ~~180 181~~ 182 183 184 185 186 187 188 189 190	184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217	+ + - + - + - + - - + + + + + + + + + +	return True if self.bPersonalDic and self.oPersonalDic.lookup(sWord): return True return False def getMorph (self, sWord): "retrieves morphologies list, different casing allowed" if self.bStorage and sWord in self._dMorphologies: return self._dMorphologies[sWord] ~~l~~Result~~ = self.oMainDic.getMorph(sWord)~~ lMorph = self.oMainDic.getMorph(sWord) if self.bExtendedDic: ~~l~~Result~~.extend(self.oExtendedDic.getMorph(sWord))~~ lMorph.extend(self.oExtendedDic.getMorph(sWord)) if self.bCommunityDic: ~~l~~Result~~.extend(self.oCommunityDic.getMorph(sWord))~~ lMorph.extend(self.oCommunityDic.getMorph(sWord)) if self.bPersonalDic: ~~l~~Result~~.extend(self.oPersonalDic.getMorph(sWord)) return l~~Result~~~~ lMorph.extend(self.oPersonalDic.getMorph(sWord)) if self.bStorage: self._dMorphologies[sWord] = lMorph self._dLemmas[sWord] = set([ s[1:s.find(" ")] for s in lMorph ]) return lMorph def getLemma (self, sWord): "retrieves lemmas" if self.bStorage: if sWord not in self._dLemmas: self.getMorph(sWord) return self._dLemmas[sWord] return set([ s[1:s.find(" ")] for s in self.getMorph(sWord) ]) def suggest (self, sWord, nSuggLimit=10): "generator: returns 1, 2 or 3 lists of suggestions" yield self.oMainDic.suggest(sWord, nSuggLimit) if self.bExtendedDic: yield self.oExtendedDic.suggest(sWord, nSuggLimit)
︙


























































































































































































































































































1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282	+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +	# Create a Direct Acyclic Rule Graph (DARG) import re import traceback import json import darg dDEF = {} dACTIONS = {} lFUNCTIONS = [] def prepareFunction (s): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"isStart \(\)", 'before(["<START>", ","])', s) s = re.sub(r"isRealStart \(\)", 'before(["<START>"])', s) s = re.sub(r"isStart0 \(\)", 'before0(["<START>", ","])', s) s = re.sub(r"isRealStart0 \(\)", 'before0(["<START>"])', s) s = re.sub(r"isEnd \(\)", 'after(["<END>", ","])', s) s = re.sub(r"isRealEnd \(\)", 'after(["<END>"])', s) s = re.sub(r"isEnd0 \(\)", 'after0(["<END>", ","])', s) s = re.sub(r"isRealEnd0 \(\)", 'after0(["<END>"])', s) s = re.sub(r"(select\|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s) s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s) s = re.sub(r"(morph\|morphex\|displayInfo)[(][\\](\d+)", '\\1(lToken[\\2])', s) s = re.sub(r"token\(\s(\d)", 'nextToken(\\1', s) # token(n) s = re.sub(r"token\(\s-(\d)", 'prevToken(\\1', s) # token(-n) s = re.sub(r"before\(\s", 'look(s[:m.start()], ', s) # before(s) s = re.sub(r"after\(\s", 'look(s[m.end():], ', s) # after(s) s = re.sub(r"textarea\(\s", 'look(s, ', s) # textarea(s) s = re.sub(r"before_chk1\(\s", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s) s = re.sub(r"after_chk1\(\s", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s) s = re.sub(r"textarea_chk1\(\s", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s) s = re.sub(r"isEndOfNG\(\s\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s) s = re.sub(r"isNextNotCOD\(\s\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s) s = re.sub(r"isNextVerb\(\s\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s) s = re.sub(r"\bspell [(]", '_oSpellChecker.isValid(', s) s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s) return s def changeReferenceToken (s, dPos): for i in range(len(dPos), 0, -1): s = s.replace("\\"+str(i), "\\"+dPos[i]) return s def createRule (iLine, sRuleName, sTokenLine, sActions, nPriority): # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority) lToken = sTokenLine.split() # Calculate positions dPos = {} nGroup = 0 for i, sToken in enumerate(lToken): if sToken.startswith("(") and sToken.endswith(")"): lToken[i] = sToken[1:-1] nGroup += 1 dPos[nGroup] = i # Parse actions for nAction, sAction in enumerate(sActions.split(" <<- ")): if sAction.strip(): sActionId = sRuleName + "_a" + str(nAction) aAction = createAction(sActionId, sAction, nGroup, nPriority, dPos) if aAction: dACTIONS[sActionId] = aAction lResult = list(lToken) lResult.extend(["##"+str(iLine), sActionId]) yield lResult def createAction (sIdAction, sAction, nGroup, nPriority, dPos): m = re.search("([-~=])(\\d+\|)(:\\d+\|)>> ", sAction) if not m: print(" # Error. No action found at: ", sIdAction) print(" ==", sAction, "==") return None # Condition sCondition = sAction[:m.start()].strip() if sCondition: sCondition = prepareFunction(sCondition) sCondition = changeReferenceToken(sCondition, dPos) lFUNCTIONS.append(("g_c_"+sIdAction, sCondition)) sCondition = "g_c_"+sIdAction else: sCondition = "" # Action cAction = m.group(1) sAction = sAction[m.end():].strip() sAction = changeReferenceToken(sAction, dPos) iStartAction = int(m.group(2)) if m.group(2) else 0 iEndAction = int(m.group(3)[1:]) if m.group(3) else iStartAction if nGroup: iStartAction = dPos[iStartAction] iEndAction = dPos[iEndAction] if cAction == "-": ## error iMsg = sAction.find(" # ") if iMsg == -1: sMsg = "# Error. Error message not found." sURL = "" print(sMsg + " Action id: " + sIdAction) else: sMsg = sAction[iMsg+3:].strip() sAction = sAction[:iMsg].strip() sURL = "" mURL = re.search("[\|] (https?://.)", sMsg) if mURL: sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() if sMsg[0:1] == "=": sMsg = prepareFunction(sMsg[1:]) lFUNCTIONS.append(("g_m_"+sIdAction, sMsg)) for x in re.finditer("group[(](\d+)[)]", sMsg): if int(x.group(1)) > nGroup: print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)") sMsg = "=g_m_"+sIdAction else: for x in re.finditer(r"\\(\d+)", sMsg): if int(x.group(1)) > nGroup: print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)") if re.search("[.]\\w+[(]", sMsg): print("# Error in message at line " + sIdAction + ": This message looks like code. Line should begin with =") if sAction[0:1] == "=" or cAction == "=": if "define" in sAction and not re.search(r"define\(\\\d+ , \[.\] \)", sAction): print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings") sAction = prepareFunction(sAction) for x in re.finditer("group[(](\d+)[)]", sAction): if int(x.group(1)) > nGroup: print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)") else: for x in re.finditer(r"\\(\d+)", sAction): if int(x.group(1)) > nGroup: print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)") if re.search("[.]\\w+[(]\|sugg\\w+[(]", sAction): print("# Error in action at line " + sIdAction + ": This action looks like code. Line should begin with =") if cAction == "-": ## error detected --> suggestion if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": lFUNCTIONS.append(("g_s_"+sIdAction, sAction[1:])) sAction = "=g_s_"+sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] if not sMsg: print("# Error in action at line " + sIdAction + ": The message is empty.") return [sCondition, cAction, sAction, iStartAction, iEndAction, nPriority, sMsg, sURL] elif cAction == "~": ## text processor if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": lFUNCTIONS.append(("g_p_"+sIdAction, sAction[1:])) sAction = "=g_p_"+sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] return [sCondition, cAction, sAction, iStartAction, iEndAction] elif cAction == "=": ## disambiguator if sAction[0:1] == "=": sAction = sAction[1:] if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") lFUNCTIONS.append(("g_d_"+sIdAction, sAction)) sAction = "g_d_"+sIdAction return [sCondition, cAction, sAction] elif cAction == ">": ## no action, break loop if condition is False return [sCondition, cAction, ""] else: print("# Unknown action at line " + sIdAction) return None def make (spLang, sLang, bJavaScript): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here print("> read graph rules file...") try: lRules = open(spLang + "/rules_graph.grx", 'r', encoding="utf-8").readlines() except: print("Error. Rules file in project [" + sLang + "] not found.") exit() # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") global dDEF lLine = [] lRuleLine = [] lTest = [] lOpt = [] lTokenLine = [] sActions = "" nPriority = 4 for i, sLine in enumerate(lRules, 1): sLine = sLine.rstrip() if "\t" in sLine: print("Error. Tabulation at line: ", i) break if sLine.startswith('#END'): printBookmark(0, "BREAK BY #END", i) break elif sLine.startswith("#"): pass elif sLine.startswith("DEF:"): m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]) +(.+)$", sLine.strip()) if m: dDEF["{"+m.group(1)+"}"] = m.group(2) else: print("Error in definition: ", end="") print(sLine.strip()) elif sLine.startswith("TEST:"): lTest.append("{:<8}".format(i) + " " + sLine[5:].strip()) elif sLine.startswith("TODO:"): pass elif sLine.startswith("!!"): m = re.search("^!!+", sLine) nExMk = len(m.group(0)) if sLine[nExMk:].strip(): printBookmark(nExMk-2, sLine[nExMk:].strip(), i) elif sLine.startswith("__") and sLine.endswith("__"): # new rule group m = re.match("__(\\w+)(!\\d\|)__", sLine) if m: sRuleName = m.group(1) nPriority = int(m.group(2)[1:]) if m.group(2) else 4 else: print("Error at rule group: ", sLine, " -- line:", i) break elif re.match("[ ]$", sLine): # empty line to end merging for i, sTokenLine in lTokenLine: lRuleLine.append((i, sRuleName, sTokenLine, sActions, nPriority)) lTokenLine = [] sActions = "" sRuleName = "" nPriority = 4 elif sLine.startswith((" ")): # actions sActions += " " + sLine.strip() else: lTokenLine.append([i, sLine.strip()]) # tests print(" list tests...") sGCTests = "\n".join(lTest) sGCTestsJS = '{ "aData2": ' + json.dumps(lTest, ensure_ascii=False) + " }\n" # processing rules print(" preparing rules...") lPreparedRule = [] for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine: for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority): lPreparedRule.append(lRule) # Graph creation for e in lPreparedRule: print(e) oDARG = darg.DARG(lPreparedRule, sLang) oRuleGraph = oDARG.createGraph() # Result d = { "graph_callables": None, "graph_gctests": None, "rules_graph": oRuleGraph, "rules_actions": dACTIONS } return d















































































































































































1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175	+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +	#!python3 # RULE GRAPH BUILDER # # by Olivier R. # License: MPL 2 import json import time import traceback from graphspell.progressbar import ProgressBar class DARG: """DIRECT ACYCLIC RULE GRAPH""" # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115) def __init__ (self, lRule, sLangCode): print("===== Direct Acyclic Rule Graph - Minimal Acyclic Finite State Automaton =====") # Preparing DARG print(" > Preparing list of tokens") self.sLangCode = sLangCode self.nRule = len(lRule) self.aPreviousRule = [] Node.resetNextId() self.oRoot = Node() self.lUncheckedNodes = [] # list of nodes that have not been checked for duplication. self.lMinimizedNodes = {} # list of unique nodes that have been checked for duplication. self.nNode = 0 self.nArc = 0 # build lRule.sort() oProgBar = ProgressBar(0, len(lRule)) for aRule in lRule: self.insert(aRule) oProgBar.increment(1) oProgBar.done() self.finish() self.countNodes() self.countArcs() self.displayInfo() # BUILD DARG def insert (self, aRule): if aRule < self.aPreviousRule: sys.exit("# Error: tokens must be inserted in order.") # find common prefix between word and previous word nCommonPrefix = 0 for i in range(min(len(aRule), len(self.aPreviousRule))): if aRule[i] != self.aPreviousRule[i]: break nCommonPrefix += 1 # Check the lUncheckedNodes for redundant nodes, proceeding from last # one down to the common prefix size. Then truncate the list at that point. self._minimize(nCommonPrefix) # add the suffix, starting from the correct node mid-way through the graph if len(self.lUncheckedNodes) == 0: oNode = self.oRoot else: oNode = self.lUncheckedNodes[-1][2] iToken = nCommonPrefix for sToken in aRule[nCommonPrefix:]: oNextNode = Node() oNode.dArcs[sToken] = oNextNode self.lUncheckedNodes.append((oNode, sToken, oNextNode)) if iToken == (len(aRule) - 2): oNode.bFinal = True iToken += 1 oNode = oNextNode oNode.bFinal = True self.aPreviousRule = aRule def finish (self): "minimize unchecked nodes" self._minimize(0) def _minimize (self, downTo): # proceed from the leaf up to a certain point for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ): oNode, sToken, oChildNode = self.lUncheckedNodes[i] if oChildNode in self.lMinimizedNodes: # replace the child with the previously encountered one oNode.dArcs[sToken] = self.lMinimizedNodes[oChildNode] else: # add the state to the minimized nodes. self.lMinimizedNodes[oChildNode] = oChildNode self.lUncheckedNodes.pop() def countNodes (self): self.nNode = len(self.lMinimizedNodes) def countArcs (self): self.nArc = 0 for oNode in self.lMinimizedNodes: self.nArc += len(oNode.dArcs) def displayInfo (self): print(" * {:<12} {:>16,}".format("Rules:", self.nRule)) print(" * {:<12} {:>16,}".format("Nodes:", self.nNode)) print(" * {:<12} {:>16,}".format("Arcs:", self.nArc)) def createGraph (self): dGraph = { 0: self.oRoot.getNodeAsDict() } print(0, "\t", self.oRoot.getNodeAsDict()) for oNode in self.lMinimizedNodes: sHashId = oNode.__hash__() if sHashId not in dGraph: dGraph[sHashId] = oNode.getNodeAsDict() print(sHashId, "\t", dGraph[sHashId]) else: print("Error. Double node… same id: ", sHashId) print(str(oNode.getNodeAsDict())) return dGraph class Node: NextId = 0 def __init__ (self): self.i = Node.NextId Node.NextId += 1 self.bFinal = False self.dArcs = {} # key: arc value; value: a node @classmethod def resetNextId (cls): cls.NextId = 0 def __str__ (self): # Caution! this function is used for hashing and comparison! cFinal = "1" if self.bFinal else "0" l = [cFinal] for (key, oNode) in self.dArcs.items(): l.append(str(key)) l.append(str(oNode.i)) return "_".join(l) def __hash__ (self): # Used as a key in a python dictionary. return self.__str__().__hash__() def __eq__ (self, other): # Used as a key in a python dictionary. # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states. return self.__str__() == other.__str__() def getNodeAsDict (self): "returns the node as a dictionary structure" dNode = {} dRegex = {} dRules = {} for arc, oNode in self.dArcs.items(): if type(arc) == str and arc.startswith("~"): dRegex[arc[1:]] = oNode.__hash__() elif arc.startswith("##"): dRules[arc[1:]] = oNode.__hash__() else: dNode[arc] = oNode.__hash__() if dRegex: dNode["<regex>"] = dRegex if dRules: dNode["<rules>"] = dRules #if self.bFinal: # dNode["<final>"] = 1 return dNode





1 2 3 4 5	+ + + + +	# generated code, do not edit dGraph = ${rules_graph} dRule = ${rules_actions}













































































































































1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141	+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +	# Sentence checker from ..graphspell.tokenizer import Tokenizer from .gc_graph import dGraph oTokenizer = Tokenizer("${lang}") class Sentence: def __init__ (self, sSentence, sSentence0, nOffset): self.sSentence = sSentence self.sSentence0 = sSentence0 self.nOffset = nOffset self.lToken = list(oTokenizer.genTokens()) def parse (self): dErr = {} lPointer = [] for dToken in self.lToken: for i, dPointer in enumerate(lPointer): bValid = False for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]): dPointer["nOffset"] = dToken["i"] dPointer["dNode"] = dNode bValid = True if not bValid: del lPointer[i] for dNode in self._getNextMatchingNodes(dToken, dGraph): lPointer.append({"nOffset": 0, "dNode": dNode}) for dPointer in lPointer: if "<rules>" in dPointer["dNode"]: for dNode in dGraph[dPointer["dNode"]["<rules>"]]: dErr = self._executeActions(dNode) return dErr def _getNextMatchingNodes (self, dToken, dNode): if dToken["sValue"] in dNode: yield dGraph[dNode[dToken["sValue"]]] for sLemma in dToken["sLemma"]: if sLemma in dNode: yield dGraph[dNode[dToken["sValue"]]] if "~" in dNode: for sRegex in dNode["~"]: for sMorph in dToken["lMorph"]: if re.search(sRegex, sMorph): yield dGraph[dNode["~"][sRegex]] def _executeActions (self, dNode): for sLineId, nextNodeKey in dNode.items(): for sArc in dGraph[nextNodeKey]: bCondMemo = None sFuncCond, cActionType, sWhat, *eAct = dRule[sArc] # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ] try: bCondMemo = not sFuncCond or globals()[sFuncCond](self, dDA, sCountry, bCondMemo) if bCondMemo: if cActionType == "-": # grammar error nErrorStart = nSentenceOffset + m.start(eAct[0]) nErrorEnd = nSentenceOffset + m.start(eAct[1]) if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]: dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext) dPriority[nErrorStart] = nPriority elif cActionType == "~": # text processor self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase) bChange = True elif cActionType == "@": # text processor self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase) bChange = True elif cActionType == "=": # disambiguation globals()[sWhat](self, dDA) elif cActionType == ">": # we do nothing, this test is just a condition to apply all following actions pass else: echo("# error: unknown action at " + sLineId) elif cActionType == ">": break except Exception as e: raise Exception(str(e), "# " + sLineId + " # " + sRuleId) def _createWriterError (self): d = {} return d def _createDictError (self): d = {} return d #### Common functions def option (): pass #### Analyse tokens def morph (): pass def morphex (): pass def analyse (): pass def analysex (): pass #### Go outside scope def nextToken (): pass def prevToken (): pass def look (): pass def lookAndCheck (): pass #### Disambiguator def select (): pass def exclude (): pass def define (): pass




























































1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60	+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +	# # RÈGLES DE GRAMMAIRE FRANÇAISE POUR GRAMMALECTE # par Olivier R. # # Copyright © 2011-2017. # # This file is part of Grammalecte. # # Grammalecte is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Grammalecte is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Grammalecte. If not, see <http://www.gnu.org/licenses/> # # RÈGLES POUR LE GRAPHE DE TOKENS # DOCUMENTATION # Expressions régulières en Python : http://docs.python.org/library/re.html # [++] : séparateur des règles pour le paragraphe et des règles pour la phrase. # Types d’action: # ->> erreur # ~>> préprocesseur de texte # =>> désambiguïsateur # Fin d’interprétation du fichier avec une ligne commençant par #END # ERREURS COURANTES # http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes __rule1__ les ~:N:.:s des ~:N:.:s ces ~:N:.:s <<- -1>> acquit # Message0\|http://test.grammalecte.net __rule2__ ci important que soi ci vraiment il y a ci pour ça <<- morph(\2, ":[WAR]", False) -1>> si # Message1\|http://test.grammalecte.net __rule3__ contre nature contre pétrie contre action <<- morph(\1, "xxxx") -1:2>> =$area.replace(" ", "") # Message2\|http://test.grammalecte.org <<- ~>> =$area.replace(" ", "")

︙
15 16 17 18 19 20 21 22 23 24 25 26 27 28	15 16 17 18 19 20 21 22 23 24 25 26 27 28 29	+	import json import platform from distutils import dir_util, file_util import dialog_bundled import compile_rules import compile_rules_graph import helpers import lex_build sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n"
︙
189 190 191 192 193 194 195 ~~196 197~~ 198 199 200 201 202 203 204	190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208	- - + + + + +	spLang = "gc_lang/" + sLang dVars = xConfig._sections['args'] dVars['locales'] = dVars["locales"].replace("_", "-") dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ])) ## COMPILE RULES ~~dResult = compile_rules.make(spLang, dVars['lang'], bJavaScript) dVars.update(dResult)~~ dResultRegex = compile_rules.make(spLang, dVars['lang'], bJavaScript) dVars.update(dResultRegex) dResultGraph = compile_rules_graph.make(spLang, dVars['lang'], bJavaScript) dVars.update(dResultGraph) ## READ GRAMMAR CHECKER PLUGINS print("PYTHON:") print("+ Plugins: ", end="") sCodePlugins = "" for sf in os.listdir(spLang+"/modules"): if re.match(r"gce_\w+[.]py$", sf):
︙

Grammalecte Changes On Branch 8d8f667d6938d550

Changes In Branch rg Through [8d8f667d69] Excluding Merge-Ins