294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
|
return lToken[-1]
return lToken[i]
#### Disambiguator for regex rules
def select (dTokenPos, nPos, sWord, sPattern):
"Disambiguation: select morphologies of <sWord> matching <sPattern>"
if not sWord:
return True
if nPos not in dTokenPos:
echo("Error. There should be a token at this position: ", nPos)
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect and len(lSelect) != len(lMorph):
dTokenPos[nPos]["lMorph"] = lSelect
return True
def exclude (dTokenPos, nPos, sWord, sPattern):
"Disambiguation: exclude morphologies of <sWord> matching <sPattern>"
if not sWord:
return True
if nPos not in dTokenPos:
echo("Error. There should be a token at this position: ", nPos)
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect and len(lSelect) != len(lMorph):
dTokenPos[nPos]["lMorph"] = lSelect
return True
def define (dTokenPos, nPos, sMorphs):
"Disambiguation: set morphologies of token at <nPos> with <sMorphs>"
if nPos not in dTokenPos:
echo("Error. There should be a token at this position: ", nPos)
return True
dTokenPos[nPos]["lMorph"] = sMorphs.split("|")
return True
#### Disambiguation for graph rules
def g_select (dToken, sPattern):
"Disambiguation: select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect and len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
#echo("DA:", dToken["sValue"], dToken["lMorph"])
return True
def g_exclude (dToken, sPattern):
"Disambiguation: select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect and len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
#echo("DA:", dToken["sValue"], dToken["lMorph"])
return True
def g_addmorph (dToken, sNewMorph):
|
|
>
>
|
<
<
<
|
|
|
<
|
|
<
|
<
<
<
<
|
|
>
>
|
>
|
<
<
|
|
|
|
<
<
<
<
|
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
|
return lToken[-1]
return lToken[i]
#### Disambiguator for regex rules
def select (dTokenPos, nPos, sWord, sPattern, sNegPattern=""):
"Disambiguation: select morphologies of <sWord> matching <sPattern>"
if not sWord:
return True
if nPos not in dTokenPos:
echo("Error. There should be a token at this position: ", nPos)
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
if sPattern:
if sNegPattern:
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) and not re.search(sNegPattern, sMorph) ]
else:
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
elif sNegPattern:
lSelect = [ sMorph for sMorph in lMorph if not re.search(sNegPattern, sMorph) ]
else:
echo("# Error: missing pattern for disambiguation selection...")
return True
if lSelect and len(lSelect) != len(lMorph):
dTokenPos[nPos]["lMorph"] = lSelect
return True
def define (dTokenPos, nPos, sMorphs):
"Disambiguation: set morphologies of token at <nPos> with <sMorphs>"
if nPos not in dTokenPos:
echo("Error. There should be a token at this position: ", nPos)
return True
dTokenPos[nPos]["lMorph"] = sMorphs.split("|")
return True
#### Disambiguation for graph rules
def g_select (dToken, sPattern, sNegPattern=""):
"Disambiguation: select morphologies for <dToken> according to <sPattern>, removing those matching <sNegPattern>; always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
if sPattern:
if sNegPattern:
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) and not re.search(sNegPattern, sMorph) ]
else:
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
elif sNegPattern:
lSelect = [ sMorph for sMorph in lMorph if not re.search(sNegPattern, sMorph) ]
else:
echo("# Error: missing pattern for disambiguation selection...")
return True
if lSelect and len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
#echo("DA:", dToken["sValue"], dToken["lMorph"])
return True
def g_addmorph (dToken, sNewMorph):
|