Grammalecte  Diff

Differences From Artifact [8edab9530f]:

To Artifact [daf4f76e4f]:


37
38
39
40
41
42
43
44

45
46
47
48
49
50
51
37
38
39
40
41
42
43

44
45
46
47
48
49
50
51







-
+







    """DIRECT ACYCLIC WORD GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
    # We store suffix/affix codes and tags within the graph after the “real” word.
    # A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
    # Each arc is an index in self.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
    # Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.

    def __init__ (self, spfSrc, sLangCode, sLangName, sDicName, cStemming):
    def __init__ (self, spfSrc, cStemming, sLangCode, sLangName="", sDicName=""):
        print("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====")
        cStemming = cStemming.upper()
        if cStemming == "A":
            funcStemmingGen = st.defineAffixCode
        elif cStemming == "S":
            funcStemmingGen = st.defineSuffixCode
        elif cStemming == "N":
412
413
414
415
416
417
418
419

420
421
422
423
424
425
426
427
428
429
430
431
432

433
434
435
436

437
438
439
440
441
442
443
412
413
414
415
416
417
418

419
420
421
422
423
424
425
426
427
428
429
430
431

432
433
434
435

436
437
438
439
440
441
442
443







-
+












-
+



-
+







            hDst.write(json.dumps({
                            "sHeader": "/pyfsa/",
                            "sLangCode": self.sLangCode,
                            "sLangName": self.sLangName,
                            "sDicName": self.sDicName,
                            "sFileName": self.sFileName,
                            "sDate": str(datetime.datetime.now())[:-7],
                            "nEntries": self.nEntry,
                            "nEntry": self.nEntry,
                            "nChar": self.nChar,
                            "nAff": self.nAff,
                            "nTag": self.nTag,
                            "cStemming": self.cStemming,
                            "dChar": self.dChar,
                            "nNode": self.nNode,
                            "nArc": self.nArc,
                            "nArcVal": self.nArcVal,
                            "lArcVal": self.lArcVal,
                            "nCompressionMethod": nCompressionMethod,
                            "nBytesArc": self.nBytesArc,
                            "nBytesNodeAddress": self.nBytesNodeAddress,
                            "nBytesOffset": self.nBytesOffset
                            "nBytesOffset": self.nBytesOffset,
                            # JavaScript is a pile of shit, so Mozilla’s JS parser don’t like file bigger than 4 Mb!
                            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
                            # https://github.com/mozilla/addons-linter/issues/1361
                            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ],
                            "sByDic": byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in byDic ]
                        }, ensure_ascii=False))
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")

    def _writeBinary (self, sPathFile, nCompressionMethod):
        """
        Format of the binary indexable dictionary: