240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
"mh_frequent_words": True,
"ma_word": True,
"ma_1letter_lowercase": False,
"ma_1letter_uppercase": False
}
class TextFormatter:
"Text Formatter: purge typographic mistakes from text"
def __init__ (self):
for _, lTup in dReplTable.items():
for i, t in enumerate(lTup):
lTup[i] = (re.compile(t[0]), t[1])
def formatText (self, sText):
"returns formatted text"
for sOptName, bVal in dDefaultOptions.items():
if bVal:
for zRgx, sRep in dReplTable[sOptName]:
sText = zRgx.sub(sRep, sText)
return sText
def getDefaultOptions (self):
"returns default options"
return dDefaultOptions.copy()
|
|
<
|
>
|
|
|
>
>
|
|
>
>
>
>
>
|
|
|
|
|
>
|
|
|
|
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
"mh_frequent_words": True,
"ma_word": True,
"ma_1letter_lowercase": False,
"ma_1letter_uppercase": False
}
_bCompiled = False
def _compileRegex():
global _bCompiled
for _, lTup in dReplTable.items():
for i, t in enumerate(lTup):
lTup[i] = (re.compile(t[0]), t[1])
_bCompiled = True
def formatText (sText, dOpt=None):
"returns formatted text"
if not _bCompiled:
_compileRegex()
dOptions = getDefaultOptions()
if dOpt:
dOptions.update(dOpt)
for sOptName, bVal in dOptions.items():
if bVal:
for zRgx, sRep in dReplTable[sOptName]:
sText = zRgx.sub(sRep, sText)
return sText
def getDefaultOptions ():
"returns default options"
return dDefaultOptions.copy()
|