Grammalecte: Check-in [a835df567d]

Overview

Comment:	[build] rename global vars
Downloads:	Tarball \| ZIP archive \| SQL archive
Timelines:	family \| ancestors \| descendants \| both \| trunk \| build
Files:	files \| file ages \| folders
SHA3-256:	a835df567d36bd6d8260ae36db48b7e032aa0efa7128ba797df7fdb6bb0878b1
User & Date:	olr on 2017-05-16 23:46:18
Other Links:	manifest \| tags

Context

2017-05-16
23:52		[build] count unnamed rules check-in: e6a7cd50a7 user: olr tags: trunk, build
23:46		[build] rename global vars check-in: a835df567d user: olr tags: trunk, build
22:30		[fr] tabulations check-in: b650eee6b7 user: olr tags: trunk, fr

Changes

Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Modified compile_rules.py from [e2df43feb0] to [e1c06fc5a9].

1 2 3 4 5 6 7 8 9 ~~10 11~~ 12 13 14 15 16 ~~17 18~~ 19 20 21 22 23 24 25	1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25	- - + + - + - + - - + +	import re import sys import traceback import copy import json from distutils import file_util ~~DEF = {} FUNCTIONS = []~~ dDEF = {} lFUNCTIONS = [] ~~RULESET = set() # set of rule-ids to check if there is several rules with the same id~~ aRULESET = set() # set of rule-ids to check if there is several rules with the same id ~~JSREGEXES = {}~~ dJSREGEXES = {} ~~WORDLIMITLEFT = r"(?<![\w.,–-])" # r"(?<![-.,—])\b" seems slower WORDLIMITRIGHT = r"(?![\w–-])" # r"\b(?!-—)" seems slower~~ sWORDLIMITLEFT = r"(?<![\w.,–-])" # r"(?<![-.,—])\b" seems slower sWORDLIMITRIGHT = r"(?![\w–-])" # r"\b(?!-—)" seems slower def prepareFunction (s): s = s.replace("__also__", "bCondMemo") s = s.replace("__else__", "not bCondMemo") s = re.sub(r"(select\|exclude)[(][\\](\d+)", '\\1(dDA, m.start(\\2), m.group(\\2)', s) s = re.sub(r"define[(][\\](\d+)", 'define(dDA, m.start(\\1)', s)
︙
160 161 162 163 164 165 166 ~~167~~ 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 ~~186~~ 187 188 ~~189~~ 190 191 192 193 194 195 196	160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196	- + - + - +	traceback.print_exc() print(sRegex) return 0 def createRule (s, nIdLine, sLang, bParagraph, dOptPriority): "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]" ~~global JSREGEXES~~ global dJSREGEXES #### OPTIONS sLineId = str(nIdLine) + ("p" if bParagraph else "s") sRuleId = sLineId sOption = False # False or [a-z0-9]+ name nPriority = 4 # Default is 4, value must be between 0 and 9 tGroups = None # code for groups positioning (only useful for JavaScript) cCaseMode = 'i' # i: case insensitive, s: case sensitive, u: uppercasing allowed cWordLimitLeft = '[' # [: word limit, <: no specific limit cWordLimitRight = ']' # ]: word limit, >: no specific limit m = re.match("^__(?P<borders_and_case>[[<]\\w[]>])(?P<option>/[a-zA-Z0-9]+\|)(?P<ruleid>\\(\\w+\\)\|)(?P<priority>![0-9]\|)__ *", s) if m: cWordLimitLeft = m.group('borders_and_case')[0] cCaseMode = m.group('borders_and_case')[1] cWordLimitRight = m.group('borders_and_case')[2] sOption = m.group('option')[1:] if m.group('option') else False if m.group('ruleid'): sRuleId = m.group('ruleid')[1:-1] ~~if sRuleId in RULESET:~~ if sRuleId in aRULESET: print("# Error. Several rules have the same id: " + sRuleId) exit() ~~RULESET.add(sRuleId)~~ aRULESET.add(sRuleId) nPriority = dOptPriority.get(sOption, 4) if m.group('priority'): nPriority = int(m.group('priority')[1:]) s = s[m.end(0):] else: print("# Warning. No option defined at line: " + sLineId)
︙
206 207 208 209 210 211 212 ~~213~~ 214 215 216 217 218 219 220 221 222 223 ~~224~~ 225 226 227 228 229 230 231 232 233 234 235 236 237 ~~238~~ 239 ~~240~~ 241 242 243 244 245 246 247	206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247	- + - + - + - +	m = re.search("@@\\S+", sRegex) if m: tGroups = groupsPositioningCodeToList(sRegex[m.start()+2:]) sRegex = sRegex[:m.start()].strip() # JS regex m = re.search("<js>.+</js>i?", sRegex) if m: ~~JSREGEXES[sLineId] = m.group(0)~~ dJSREGEXES[sLineId] = m.group(0) sRegex = sRegex[:m.start()].strip() if "<js>" in sRegex or "</js>" in sRegex: print("# Error: JavaScript regex not delimited at line " + sLineId) return None # quotes ? if sRegex.startswith('"') and sRegex.endswith('"'): sRegex = sRegex[1:-1] ## definitions ~~for sDef, sRepl in DEF.items():~~ for sDef, sRepl in dDEF.items(): sRegex = sRegex.replace(sDef, sRepl) ## count number of groups (must be done before modifying the regex) nGroup = countGroupInRegex(sRegex) if nGroup > 0: if not tGroups: print("# Warning: groups positioning code for JavaScript should be defined at line " + sLineId) else: if nGroup != len(tGroups): print("# Error: groups positioning code irrelevant at line " + sLineId) ## word limit if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")): ~~sRegex = WORDLIMITLEFT + sRegex~~ sRegex = sWORDLIMITLEFT + sRegex if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")): ~~sRegex = sRegex + WORDLIMITRIGHT~~ sRegex = sRegex + sWORDLIMITRIGHT ## casing mode if cCaseMode == "i": bCaseInsensitive = True if not sRegex.startswith("(?i)"): sRegex = "(?i)" + sRegex elif cCaseMode == "s":
︙
278 279 280 281 282 283 284 ~~285~~ 286 287 288 289 290 291 292 293 294 295 ~~296~~ 297 298 299 300 301 302 303	278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303	- + - +	return None return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups] def createAction (sIdAction, sAction, nGroup): "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])" ~~global FUNCTIONS~~ global lFUNCTIONS m = re.search(r"([-~=>])(\d*\|)>>", sAction) if not m: print("# No action at line " + sIdAction) return None #### CONDITION sCondition = sAction[:m.start()].strip() if sCondition: sCondition = prepareFunction(sCondition) ~~FUNCTIONS.append(("c_"+sIdAction, sCondition))~~ lFUNCTIONS.append(("c_"+sIdAction, sCondition)) for x in re.finditer("[.](?:group\|start\|end)[(](\d+)[)]", sCondition): if int(x.group(1)) > nGroup: print("# Error in groups in condition at line " + sIdAction + " ("+str(nGroup)+" groups only)") if ".match" in sCondition: print("# Error. JS compatibility. Don't use .match() in condition, use .search()") sCondition = "c_"+sIdAction else:
︙
319 320 321 322 323 324 325 ~~326~~ 327 328 329 330 331 332 333	319 320 321 322 323 324 325 326 327 328 329 330 331 332 333	- +	sURL = "" mURL = re.search("[\|] (https?://.)", sMsg) if mURL: sURL = mURL.group(1).strip() sMsg = sMsg[:mURL.start(0)].strip() if sMsg[0:1] == "=": sMsg = prepareFunction(sMsg[1:]) ~~FUNCTIONS.append(("m_"+sIdAction, sMsg))~~ lFUNCTIONS.append(("m_"+sIdAction, sMsg)) for x in re.finditer("group[(](\d+)[)]", sMsg): if int(x.group(1)) > nGroup: print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)") sMsg = "=m_"+sIdAction else: for x in re.finditer(r"\\(\d+)", sMsg): if int(x.group(1)) > nGroup:
︙
351 352 353 354 355 356 357 ~~358~~ 359 360 361 362 363 364 365 366 367 368 369 ~~370~~ 371 372 373 374 375 376 377 378 379 380 ~~381~~ 382 383 384 385 386 387 388	351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388	- + - + - +	print("# Error in action at line " + sIdAction + ": This action looks like code. Line should begin with =") if cAction == "-": ## error detected --> suggestion if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": ~~FUNCTIONS.append(("s_"+sIdAction, sAction[1:]))~~ lFUNCTIONS.append(("s_"+sIdAction, sAction[1:])) sAction = "=s_"+sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] if not sMsg: print("# Error in action at line " + sIdAction + ": the message is empty.") return [sCondition, cAction, sAction, iGroup, sMsg, sURL] elif cAction == "~": ## text processor if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") if sAction[0:1] == "=": ~~FUNCTIONS.append(("p_"+sIdAction, sAction[1:]))~~ lFUNCTIONS.append(("p_"+sIdAction, sAction[1:])) sAction = "=p_"+sIdAction elif sAction.startswith('"') and sAction.endswith('"'): sAction = sAction[1:-1] return [sCondition, cAction, sAction, iGroup] elif cAction == "=": ## disambiguator if sAction[0:1] == "=": sAction = sAction[1:] if not sAction: print("# Error in action at line " + sIdAction + ": This action is empty.") ~~FUNCTIONS.append(("d_"+sIdAction, sAction))~~ lFUNCTIONS.append(("d_"+sIdAction, sAction)) sAction = "d_"+sIdAction return [sCondition, cAction, sAction] elif cAction == ">": ## no action, break loop if condition is False return [sCondition, cAction, ""] else: print("# Unknown action at line " + sIdAction)
︙
402 403 404 405 406 407 408 ~~409 410~~ 411 412 413 414 415 416 417	402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417	- - + +	# Ā-ʯ 0100-02AF (mixed) # -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ bCaseInsensitive = False if "(?i)" in sRegex: sRegex = sRegex.replace("(?i)", "") bCaseInsensitive = True lNegLookBeforeRegex = [] ~~if WORDLIMITLEFT in sRegex: sRegex = sRegex.replace(WORDLIMITLEFT, "")~~ if sWORDLIMITLEFT in sRegex: sRegex = sRegex.replace(sWORDLIMITLEFT, "") lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"] sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ") sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]") sRegex = sRegex.replace("[.]", r"\.") if not sRegex.startswith("<js>"): sRegex = sRegex.replace("/", r"\/") m = re.search(r"\(\?<!([^()]+)\)", sRegex) # Negative lookbefore assertion should always be at the beginning of regex
︙
436 437 438 439 440 441 442 ~~443~~ 444 445 446 447 448 449 450	436 437 438 439 440 441 442 443 444 445 446 447 448 449 450	- +	lRuleJS = copy.deepcopy(lRule) del lRule[-1] # tGroups positioning codes are useless for Python # error messages for aAction in lRuleJS[6]: if aAction[1] == "-": aAction[4] = aAction[4].replace("« ", "« ").replace(" »", " »") # js regexes ~~lRuleJS[1], lNegLookBehindRegex = regex2js( JSREGEXES.get(lRuleJS[3], lRuleJS[1]) )~~ lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) ) lRuleJS.append(lNegLookBehindRegex) return lRuleJS def writeRulesToJSArray (lRules): sArray = "[\n" for sOption, aRuleGroup in lRules:
︙
542 543 544 545 546 547 548 ~~549~~ 550 551 552 553 554 555 556 557 558 559 560 561 ~~562~~ 563 564 565 566 567 568 569	542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569	- + - +	def make (lRules, sLang, bJavaScript): "compile rules, returns a dictionary of values" # for clarity purpose, don’t create any file here # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines print(" parsing rules...") ~~global DEF~~ global dDEF lLine = [] lRuleLine = [] lTest = [] lOpt = [] for i, sLine in enumerate(lRules, 1): if sLine.startswith('#END'): break elif sLine.startswith("#"): pass elif sLine.startswith("DEF:"): m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip()) if m: ~~DEF["{"+m.group(1)+"}"] = m.group(2)~~ dDEF["{"+m.group(1)+"}"] = m.group(2) else: print("Error in definition: ", end="") print(sLine.strip()) elif sLine.startswith("TEST:"): lTest.append("{:<8}".format(i) + " " + sLine[5:].strip()) elif sLine.startswith("TODO:"): pass
︙
611 612 613 614 615 616 617 ~~618~~ 619 620 621 622 623 624 625	611 612 613 614 615 616 617 618 619 620 621 622 623 624 625	- +	lSentenceRules.append(aRule) lSentenceRulesJS.append(pyRuleToJS(aRule)) # creating file with all functions callable by rules print(" creating callables...") sPyCallables = "# generated code, do not edit\n" sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n" ~~for sFuncName, sReturn in FUNCTIONS:~~ for sFuncName, sReturn in lFUNCTIONS: cType = sFuncName[0:1] if cType == "c": # condition sParams = "s, sx, m, dDA, sCountry, bCondMemo" elif cType == "m": # message sParams = "s, m" elif cType == "s": # suggestion sParams = "s, m"
︙

Grammalecte Check-in [a835df567d]