140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
- aLabels: list of labels (human readable meaning of tags)
for WORD tokens:
- bValidToken: True if the token is valid for the spellchecker
- lSubTokens for each parts of the split token
"""
if not self.lexicographer:
return
if "lMorph" not in dToken:
dToken["lMorph"] = self.getMorph(dToken["sValue"])
if dToken["sType"].startswith("WORD"):
dToken["bValidToken"] = True if "lMorph" in dToken else self.isValidToken(dToken["sValue"])
sPrefix, sStem, sSuffix = self.lexicographer.split(dToken["sValue"])
if sStem != dToken["sValue"]:
dToken["lSubTokens"] = [
{ "sType": "WORD", "sValue": sPrefix, "lMorph": self.getMorph(sPrefix) },
{ "sType": "WORD", "sValue": sStem, "lMorph": self.getMorph(sStem) },
{ "sType": "WORD", "sValue": sSuffix, "lMorph": self.getMorph(sSuffix) }
]
|
>
>
<
|
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
- aLabels: list of labels (human readable meaning of tags)
for WORD tokens:
- bValidToken: True if the token is valid for the spellchecker
- lSubTokens for each parts of the split token
"""
if not self.lexicographer:
return
if dToken["sType"].startswith("WORD"):
dToken["bValidToken"] = True if "lMorph" in dToken else self.isValidToken(dToken["sValue"])
if "lMorph" not in dToken:
dToken["lMorph"] = self.getMorph(dToken["sValue"])
if dToken["sType"].startswith("WORD"):
sPrefix, sStem, sSuffix = self.lexicographer.split(dToken["sValue"])
if sStem != dToken["sValue"]:
dToken["lSubTokens"] = [
{ "sType": "WORD", "sValue": sPrefix, "lMorph": self.getMorph(sPrefix) },
{ "sType": "WORD", "sValue": sStem, "lMorph": self.getMorph(sStem) },
{ "sType": "WORD", "sValue": sSuffix, "lMorph": self.getMorph(sSuffix) }
]
|