240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
-
+
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
|
"mh_frequent_words": True,
"ma_word": True,
"ma_1letter_lowercase": False,
"ma_1letter_uppercase": False
}
class TextFormatter:
_bCompiled = False
"Text Formatter: purge typographic mistakes from text"
def __init__ (self):
for _, lTup in dReplTable.items():
for i, t in enumerate(lTup):
lTup[i] = (re.compile(t[0]), t[1])
def _compileRegex():
global _bCompiled
for _, lTup in dReplTable.items():
for i, t in enumerate(lTup):
lTup[i] = (re.compile(t[0]), t[1])
_bCompiled = True
def formatText (self, sText):
"returns formatted text"
for sOptName, bVal in dDefaultOptions.items():
if bVal:
for zRgx, sRep in dReplTable[sOptName]:
sText = zRgx.sub(sRep, sText)
return sText
def formatText (sText, dOpt=None):
"returns formatted text"
if not _bCompiled:
_compileRegex()
dOptions = getDefaultOptions()
if dOpt:
dOptions.update(dOpt)
for sOptName, bVal in dOptions.items():
if bVal:
for zRgx, sRep in dReplTable[sOptName]:
sText = zRgx.sub(sRep, sText)
return sText
def getDefaultOptions (self):
"returns default options"
return dDefaultOptions.copy()
def getDefaultOptions ():
"returns default options"
return dDefaultOptions.copy()
|