Grammalecte  Diff

Differences From Artifact [3f7c9cfb70]:

To Artifact [d7ae02087f]:


340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355

    def sortLexiconByIdx (self):
        echo(' * Dictionnaire - tri du lexique (par index)...')
        self.lFlexions = sorted(self.lFlexions, key=Flexion.keyIdx)

    def checkEntries (self):
        echo(' * Dictionnaire - contrôle des entrées...')
        for e in self.lEntry:
            e.check()

    def generateFlexions (self):
        echo(' * Lexique - genèse des formes fléchies...')
        for oEntry in self.lEntry:
            oEntry.generateFlexions(self.dFlags)
            self.lFlexions.extend(oEntry.lFlexions)
        # Count flexions in multiple entries







|
|







340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355

    def sortLexiconByIdx (self):
        echo(' * Dictionnaire - tri du lexique (par index)...')
        self.lFlexions = sorted(self.lFlexions, key=Flexion.keyIdx)

    def checkEntries (self):
        echo(' * Dictionnaire - contrôle des entrées...')
        for o in self.lEntry:
            o.check()

    def generateFlexions (self):
        echo(' * Lexique - genèse des formes fléchies...')
        for oEntry in self.lEntry:
            oEntry.generateFlexions(self.dFlags)
            self.lFlexions.extend(oEntry.lFlexions)
        # Count flexions in multiple entries
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734














735
736
737
738
739
740
741
        self.lemma = firstElems[0]
        self.flags = firstElems[1]  if len(firstElems) > 1  else ''
        # morph
        for i in range(1, nElems):
            if len(elems[i]) > 3 and elems[i][2] == ':':
                sAttr, sContent = elems[i].split(':', 1)
                if sAttr in {"po", "is", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et", "di", "fq", "id"}:
                    # vérification
                    if sAttr in {"po", "is", "lx", "se", "et"} \
                        and ( sContent not in tags.dTags[sAttr] and not (sAttr == "po" and re.match("v[0123][ea_][ix_][tx_][nx_][pqrex_][mx_][eaz_]", sContent)) ):
                        echo("  ## Étiquette inconnue pour le tag <{}>: {} @ {}/{}".format(sAttr, sContent, self.lemma, self.flags))
                    # renommage des attributs
                    if sAttr == "is":
                        sAttr = "iz"
                    if sAttr == "id":
                        sAttr = "iD"
                    # modification
                    try:
                        if sAttr in {"po", "iz", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et"}:
                            sContent = getattr(self, sAttr) + " " + sContent
                        setattr(self, sAttr, sContent.strip())
                    except:
                        echo('  ## Erreur. Attribut non attribuable: {}  @  {}/{}'.format(sAttr, self.lemma, self.flags))
                else:
                    echo('  ## Champ inconnu: {} @ {}/{}'.format(sAttr, self.lemma, self.flags))
            else:
                self.err = self.err + elems[i]
        if self.err:
            echo("\n## Erreur dans le dictionnaire : {}".format(self.err))
            echo("   @ : " + self.lemma)

    def __str__ (self):
        return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))

    def check (self):
        sErr = ''
        # lemme
        if self.lemma == '':
            sErr += ' > lemme vide'
        if re.match(r"^\s", self.lemma):
            sErr += ' > espace en début de lemme <' + self.lemma + '>'
        if re.search(r"\s$", self.lemma):
            sErr += ' > espace en fin de lemme <' + self.lemma + '>'














        # verbe
        if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
            sErr += ' > verbe mal étiqueté: ' + self.po
        if re.match(r"[abcdf]0", self.flags):
            if not re.search(r"p[+.]", self.flags):
                sErr += ' > verbe sans participe passé: ' + self.po
            if "()" not in self.flags:







<
<
<
<











|

|



|














>
>
>
>
>
>
>
>
>
>
>
>
>
>







692
693
694
695
696
697
698




699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
        self.lemma = firstElems[0]
        self.flags = firstElems[1]  if len(firstElems) > 1  else ''
        # morph
        for i in range(1, nElems):
            if len(elems[i]) > 3 and elems[i][2] == ':':
                sAttr, sContent = elems[i].split(':', 1)
                if sAttr in {"po", "is", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et", "di", "fq", "id"}:




                    # renommage des attributs
                    if sAttr == "is":
                        sAttr = "iz"
                    if sAttr == "id":
                        sAttr = "iD"
                    # modification
                    try:
                        if sAttr in {"po", "iz", "ds", "ts", "ip", "dp", "tp", "sp", "pa", "st", "al", "ph", "lx", "se", "et"}:
                            sContent = getattr(self, sAttr) + " " + sContent
                        setattr(self, sAttr, sContent.strip())
                    except:
                        echo(f'  ## Erreur. Attribut non attribuable: {sAttr}  @  {self.lemma}/{self.flags}')
                else:
                    echo(f'  ## Champ inconnu: {sAttr} @ {self.lemma}/{self.flags}')
            else:
                self.err = self.err + elems[i]
        if self.err:
            echo(f"\n## Erreur dans le dictionnaire : {self.err}")
            echo("   @ : " + self.lemma)

    def __str__ (self):
        return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))

    def check (self):
        sErr = ''
        # lemme
        if self.lemma == '':
            sErr += ' > lemme vide'
        if re.match(r"^\s", self.lemma):
            sErr += ' > espace en début de lemme <' + self.lemma + '>'
        if re.search(r"\s$", self.lemma):
            sErr += ' > espace en fin de lemme <' + self.lemma + '>'
        # détection des tags inconnus
        if self.po:
            for sTag in self.po.split():
                if sTag not in tags.dTags["po"] and not re.match("v[0123]", sTag):
                    sErr += f" > Étiquette inconnue pour l’attribut <po>: {sTag}"
        if self.iz:
            for sTag in self.iz.split():
                if sTag not in tags.dTags["is"]:
                    sErr += f" > Étiquette inconnue pour l’attribut <is>: {sTag}"
        for sAttr in {"lx", "se", "et"}:
            if getattr(self, sAttr):
                for sTag in getattr(self, sAttr).split(" "):
                    if sTag not in tags.dTags[sAttr] and not re.match("v[0123]", sTag):
                        sErr += f" > Étiquette inconnue pour l’attribut <{sAttr}>: {sTag}"
        # verbe
        if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
            sErr += ' > verbe mal étiqueté: ' + self.po
        if re.match(r"[abcdf]0", self.flags):
            if not re.search(r"p[+.]", self.flags):
                sErr += ' > verbe sans participe passé: ' + self.po
            if "()" not in self.flags:
759
760
761
762
763
764
765

766
767
768
769
770
771
772
            sErr += ' > étiquettes <is> incohérentes '
        if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFGW](?!=)", self.flags):
            sErr += ' > étiquettes <is> incohérentes '
        if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
            sErr += ' > étiquettes <is> incomplètes'
        if re.match(r"[SXAIFGW](?!=)", self.flags) and "()" not in self.flags:
            sErr += ' > drapeau () manquant'

        if sErr:
            echo(f"   erreur {sErr}   sur   " + self.__str__())

    def setTagsFrom (self, oEnt):
        self.po = oEnt.po
        self.iz = oEnt.iz
        self.ds = oEnt.ds







>







769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
            sErr += ' > étiquettes <is> incohérentes '
        if re.search(r"pl|sg|inv", self.iz) and re.match(r"[SXAIFGW](?!=)", self.flags):
            sErr += ' > étiquettes <is> incohérentes '
        if self.iz.endswith(("mas", "fem", "epi")) and (not self.flags or not self.flags.startswith(("S", "X", "F", "W", "A", "I", "U"))):
            sErr += ' > étiquettes <is> incomplètes'
        if re.match(r"[SXAIFGW](?!=)", self.flags) and "()" not in self.flags:
            sErr += ' > drapeau () manquant'
        # print
        if sErr:
            echo(f"   erreur {sErr}   sur   " + self.__str__())

    def setTagsFrom (self, oEnt):
        self.po = oEnt.po
        self.iz = oEnt.iz
        self.ds = oEnt.ds