Changes In Branch rg Through [ac09d7cc19] Excluding Merge-Ins
This is equivalent to a diff from f2d8271145 to ac09d7cc19
|
2018-05-25
| ||
| 20:07 | [build][core] graph parser update check-in: 7c742b5359 user: olr tags: core, build, rg | |
| 12:14 | [build][core] tests check-in: ac09d7cc19 user: olr tags: core, build, rg | |
| 10:07 | [build][core] multiple tokens and groups check-in: e83552ea6d user: olr tags: core, build, rg | |
|
2018-05-16
| ||
| 16:22 | [fr] pt: descente aux enfers/flambeaux check-in: b5310203be user: olr tags: trunk, fr | |
| 16:14 | [build][core] rules graph: first draft check-in: 061252f41e user: olr tags: core, build, rg | |
| 11:58 | [graphspell][bug] fix affixes occurrences calculation check-in: f2d8271145 user: olr tags: trunk, graphspell | |
|
2018-05-15
| ||
| 12:51 | [fr] test contre faux positif check-in: f8bf9c3922 user: olr tags: trunk, fr | |
Modified compile_rules.py from [1ea2b6d97a] to [394c512707].
| ︙ | ︙ | |||
456 457 458 459 460 461 462 |
m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
if m:
dDEF["{"+m.group(1)+"}"] = m.group(2)
else:
print("Error in definition: ", end="")
print(sLine.strip())
elif sLine.startswith("TEST:"):
| | | 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 |
m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
if m:
dDEF["{"+m.group(1)+"}"] = m.group(2)
else:
print("Error in definition: ", end="")
print(sLine.strip())
elif sLine.startswith("TEST:"):
lTest.append("r{:<7}".format(i) + " " + sLine[5:].strip())
elif sLine.startswith("TODO:"):
pass
elif sLine.startswith(("OPTGROUP/", "OPTSOFTWARE:", "OPT/", "OPTLANG/", "OPTDEFAULTUILANG:", "OPTLABEL/", "OPTPRIORITY/")):
lOpt.append(sLine)
elif re.match("[ \t]*$", sLine):
pass
elif sLine.startswith("!!"):
|
| ︙ | ︙ | |||
540 541 542 543 544 545 546 |
displayStats(lParagraphRules, lSentenceRules)
print("Unnamed rules: " + str(nRULEWITHOUTNAME))
d = { "callables": sPyCallables,
"callablesJS": sJSCallables,
| | | | 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 |
displayStats(lParagraphRules, lSentenceRules)
print("Unnamed rules: " + str(nRULEWITHOUTNAME))
d = { "callables": sPyCallables,
"callablesJS": sJSCallables,
"regex_gctests": sGCTests,
"regex_gctestsJS": sGCTestsJS,
"paragraph_rules": mergeRulesByOption(lParagraphRules),
"sentence_rules": mergeRulesByOption(lSentenceRules),
"paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
"sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
d.update(dOptions)
return d
|
Added compile_rules_graph.py version [80b17b07ae].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 |
# Create a Direct Acyclic Rule Graph (DARG)
import re
import traceback
import json
import darg
dDEF = {}
dACTIONS = {}
lFUNCTIONS = []
def prepareFunction (s):
s = s.replace("__also__", "bCondMemo")
s = s.replace("__else__", "not bCondMemo")
s = re.sub(r"isStart *\(\)", 'before(["<START>", ","])', s)
s = re.sub(r"isRealStart *\(\)", 'before(["<START>"])', s)
s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(lToken[\\2]', s)
s = re.sub(r"define[(][\\](\d+)", 'define(lToken[\\1]', s)
s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", '\\1(lToken[\\2]', s)
s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s) # token(n)
s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s) # token(-n)
s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s) # before(s)
s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s) # after(s)
s = re.sub(r"textarea\(\s*", 'look(s, ', s) # textarea(s)
s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s) # before_chk1(s)
s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s) # after_chk1(s)
s = re.sub(r"textarea_chk1\(\s*", 'look_chk1(dDA, s, 0, ', s) # textarea_chk1(s)
s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s) # isEndOfNG(s)
s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s) # isNextNotCOD(s)
s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s) # isNextVerb(s)
s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
s = re.sub(r"[\\](\d+)", 'lToken[\\1]', s)
return s
def changeReferenceToken (s, dPos):
for i in range(len(dPos), 0, -1):
s = s.replace("\\"+str(i), "\\"+str(dPos[i]))
return s
def genTokenRules (sTokenLine):
lToken = sTokenLine.split()
lTokenRules = None
for i, sToken in enumerate(lToken):
if sToken.startswith("{") and sToken.endswith("}") and sToken in dDEF:
lToken[i] = dDEF[sToken]
if ( (sToken.startswith("[") and sToken.endswith("]")) or (sToken.startswith("([") and sToken.endswith("])")) ):
bSelectedGroup = sToken.startswith("(") and sToken.endswith(")")
# multiple token
if not lTokenRules:
lTokenRules = [ sToken[1:-1].split("|") ]
else:
lNewTemp = []
for aRule in lTokenRules:
lElem = sToken[1:-1].split("|") if not bSelectedGroup else sToken[2:-2].split("|")
sElem1 = lElem.pop(0)
if bSelectedGroup:
sElem1 = "(" + sElem1 + ")"
for sElem in lElem:
if bSelectedGroup:
sElem = "(" + sElem + ")"
aNew = list(aRule)
aNew.append(sElem)
lNewTemp.append(aNew)
aRule.append(sElem1)
lTokenRules.extend(lNewTemp)
else:
# simple token
if not lTokenRules:
lTokenRules = [[sToken]]
else:
for aRule in lTokenRules:
aRule.append(sToken)
for aRule in lTokenRules:
print("Rule\n", aRule)
yield aRule
def createRule (iLine, sRuleName, sTokenLine, sActions, nPriority):
# print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
for lToken in genTokenRules(sTokenLine):
# Calculate positions
dPos = {}
nGroup = 0
for i, sToken in enumerate(lToken):
if sToken.startswith("(") and sToken.endswith(")"):
lToken[i] = sToken[1:-1]
nGroup += 1
dPos[nGroup] = i
# Parse actions
for nAction, sAction in enumerate(sActions.split(" <<- ")):
if sAction.strip():
sActionId = sRuleName + "_a" + str(nAction)
aAction = createAction(sActionId, sAction, nGroup, nPriority, dPos)
if aAction:
dACTIONS[sActionId] = aAction
lResult = list(lToken)
lResult.extend(["##"+str(iLine), sActionId])
yield lResult
def createAction (sIdAction, sAction, nGroup, nPriority, dPos):
m = re.search("([-~=])(\\d+|)(:\\d+|)>> ", sAction)
if not m:
print(" # Error. No action found at: ", sIdAction)
print(" ==", sAction, "==")
return None
# Condition
sCondition = sAction[:m.start()].strip()
if sCondition:
sCondition = prepareFunction(sCondition)
sCondition = changeReferenceToken(sCondition, dPos)
lFUNCTIONS.append(("g_c_"+sIdAction, sCondition))
sCondition = "g_c_"+sIdAction
else:
sCondition = ""
# Action
cAction = m.group(1)
sAction = sAction[m.end():].strip()
sAction = changeReferenceToken(sAction, dPos)
iStartAction = int(m.group(2)) if m.group(2) else 0
iEndAction = int(m.group(3)[1:]) if m.group(3) else iStartAction
if nGroup:
try:
iStartAction = dPos[iStartAction]
iEndAction = dPos[iEndAction]
except:
print("# Error. Wrong groups in: " + sIdAction)
if cAction == "-":
## error
iMsg = sAction.find(" # ")
if iMsg == -1:
sMsg = "# Error. Error message not found."
sURL = ""
print(sMsg + " Action id: " + sIdAction)
else:
sMsg = sAction[iMsg+3:].strip()
sAction = sAction[:iMsg].strip()
sURL = ""
mURL = re.search("[|] *(https?://.*)", sMsg)
if mURL:
sURL = mURL.group(1).strip()
sMsg = sMsg[:mURL.start(0)].strip()
if sMsg[0:1] == "=":
sMsg = prepareFunction(sMsg[1:])
lFUNCTIONS.append(("g_m_"+sIdAction, sMsg))
for x in re.finditer("group[(](\\d+)[)]", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
sMsg = "=g_m_"+sIdAction
else:
for x in re.finditer(r"\\(\d+)", sMsg):
if int(x.group(1)) > nGroup:
print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
if re.search("[.]\\w+[(]", sMsg):
print("# Error in message at line " + sIdAction + ": This message looks like code. Line should begin with =")
if sAction[0:1] == "=" or cAction == "=":
if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
sAction = prepareFunction(sAction)
for x in re.finditer("group[(](\\d+)[)]", sAction):
if int(x.group(1)) > nGroup:
print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
else:
for x in re.finditer(r"\\(\d+)", sAction):
if int(x.group(1)) > nGroup:
print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
if re.search("[.]\\w+[(]|sugg\\w+[(]", sAction):
print("# Error in action at line " + sIdAction + ": This action looks like code. Line should begin with =")
if cAction == "-":
## error detected --> suggestion
if not sAction:
print("# Error in action at line " + sIdAction + ": This action is empty.")
if sAction[0:1] == "=":
lFUNCTIONS.append(("g_s_"+sIdAction, sAction[1:]))
sAction = "=g_s_"+sIdAction
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
if not sMsg:
print("# Error in action at line " + sIdAction + ": The message is empty.")
return [sCondition, cAction, sAction, iStartAction, iEndAction, nPriority, sMsg, sURL]
elif cAction == "~":
## text processor
if not sAction:
print("# Error in action at line " + sIdAction + ": This action is empty.")
if sAction[0:1] == "=":
lFUNCTIONS.append(("g_p_"+sIdAction, sAction[1:]))
sAction = "=g_p_"+sIdAction
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
return [sCondition, cAction, sAction, iStartAction, iEndAction]
elif cAction == "=":
## disambiguator
if sAction[0:1] == "=":
sAction = sAction[1:]
if not sAction:
print("# Error in action at line " + sIdAction + ": This action is empty.")
lFUNCTIONS.append(("g_d_"+sIdAction, sAction))
sAction = "g_d_"+sIdAction
return [sCondition, cAction, sAction]
elif cAction == ">":
## no action, break loop if condition is False
return [sCondition, cAction, ""]
else:
print("# Unknown action at line " + sIdAction)
return None
def make (spLang, sLang, bJavaScript):
"compile rules, returns a dictionary of values"
# for clarity purpose, don’t create any file here
print("> read graph rules file...")
try:
lRules = open(spLang + "/rules_graph.grx", 'r', encoding="utf-8").readlines()
except:
print("Error. Rules file in project [" + sLang + "] not found.")
exit()
# removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
print(" parsing rules...")
global dDEF
lLine = []
lRuleLine = []
lTest = []
lOpt = []
lTokenLine = []
sActions = ""
nPriority = 4
for i, sLine in enumerate(lRules, 1):
sLine = sLine.rstrip()
if "\t" in sLine:
print("Error. Tabulation at line: ", i)
break
if sLine.startswith('#END'):
printBookmark(0, "BREAK BY #END", i)
break
elif sLine.startswith("#"):
pass
elif sLine.startswith("DEF:"):
m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
if m:
dDEF["{"+m.group(1)+"}"] = m.group(2)
else:
print("Error in definition: ", end="")
print(sLine.strip())
elif sLine.startswith("TEST:"):
lTest.append("g{:<7}".format(i) + " " + sLine[5:].strip())
elif sLine.startswith("TODO:"):
pass
elif sLine.startswith("!!"):
m = re.search("^!!+", sLine)
nExMk = len(m.group(0))
if sLine[nExMk:].strip():
printBookmark(nExMk-2, sLine[nExMk:].strip(), i)
elif sLine.startswith("__") and sLine.endswith("__"):
# new rule group
m = re.match("__(\\w+)(!\\d|)__", sLine)
if m:
sRuleName = m.group(1)
nPriority = int(m.group(2)[1:]) if m.group(2) else 4
else:
print("Error at rule group: ", sLine, " -- line:", i)
break
elif re.match("[ ]*$", sLine):
# empty line to end merging
for i, sTokenLine in lTokenLine:
lRuleLine.append((i, sRuleName, sTokenLine, sActions, nPriority))
lTokenLine = []
sActions = ""
sRuleName = ""
nPriority = 4
elif sLine.startswith((" ")):
# actions
sActions += " " + sLine.strip()
else:
lTokenLine.append([i, sLine.strip()])
# tests
print(" list tests...")
sGCTests = "\n".join(lTest)
sGCTestsJS = '{ "aData2": ' + json.dumps(lTest, ensure_ascii=False) + " }\n"
# processing rules
print(" preparing rules...")
lPreparedRule = []
for i, sRuleGroup, sTokenLine, sActions, nPriority in lRuleLine:
for lRule in createRule(i, sRuleGroup, sTokenLine, sActions, nPriority):
lPreparedRule.append(lRule)
# Graph creation
for e in lPreparedRule:
print(e)
oDARG = darg.DARG(lPreparedRule, sLang)
oRuleGraph = oDARG.createGraph()
# creating file with all functions callable by rules
print(" creating callables...")
sPyCallables = "# generated code, do not edit\n"
#sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
for sFuncName, sReturn in lFUNCTIONS:
if sFuncName.startswith("g_c_"): # condition
sParams = "lToken, sCountry, bCondMemo"
elif sFuncName.startswith("g_m_"): # message
sParams = "lToken"
elif sFuncName.startswith("g_s_"): # suggestion
sParams = "lToken"
elif sFuncName.startswith("g_p_"): # preprocessor
sParams = "lToken"
elif sFuncName.startswith("g_d_"): # disambiguator
sParams = "lToken"
else:
print("# Unknown function type in [" + sFuncName + "]")
continue
sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
sPyCallables += " return " + sReturn + "\n"
#sJSCallables += " {}: function ({})".format(sFuncName, sParams) + " {\n"
#sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
#sJSCallables += " },\n"
#sJSCallables += "}\n"
# Result
d = {
"graph_callables": sPyCallables,
"graph_gctests": sGCTests,
"rules_graph": oRuleGraph,
"rules_actions": dACTIONS
}
return d
|
Added darg.py version [589e9bbc94].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!python3
# RULE GRAPH BUILDER
#
# by Olivier R.
# License: MPL 2
import json
import time
import traceback
from graphspell.progressbar import ProgressBar
class DARG:
"""DIRECT ACYCLIC RULE GRAPH"""
# This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
def __init__ (self, lRule, sLangCode):
print("===== Direct Acyclic Rule Graph - Minimal Acyclic Finite State Automaton =====")
# Preparing DARG
print(" > Preparing list of tokens")
self.sLangCode = sLangCode
self.nRule = len(lRule)
self.aPreviousRule = []
Node.resetNextId()
self.oRoot = Node()
self.lUncheckedNodes = [] # list of nodes that have not been checked for duplication.
self.lMinimizedNodes = {} # list of unique nodes that have been checked for duplication.
self.nNode = 0
self.nArc = 0
# build
lRule.sort()
oProgBar = ProgressBar(0, len(lRule))
for aRule in lRule:
self.insert(aRule)
oProgBar.increment(1)
oProgBar.done()
self.finish()
self.countNodes()
self.countArcs()
self.displayInfo()
# BUILD DARG
def insert (self, aRule):
if aRule < self.aPreviousRule:
sys.exit("# Error: tokens must be inserted in order.")
# find common prefix between word and previous word
nCommonPrefix = 0
for i in range(min(len(aRule), len(self.aPreviousRule))):
if aRule[i] != self.aPreviousRule[i]:
break
nCommonPrefix += 1
# Check the lUncheckedNodes for redundant nodes, proceeding from last
# one down to the common prefix size. Then truncate the list at that point.
self._minimize(nCommonPrefix)
# add the suffix, starting from the correct node mid-way through the graph
if len(self.lUncheckedNodes) == 0:
oNode = self.oRoot
else:
oNode = self.lUncheckedNodes[-1][2]
iToken = nCommonPrefix
for sToken in aRule[nCommonPrefix:]:
oNextNode = Node()
oNode.dArcs[sToken] = oNextNode
self.lUncheckedNodes.append((oNode, sToken, oNextNode))
if iToken == (len(aRule) - 2):
oNode.bFinal = True
iToken += 1
oNode = oNextNode
oNode.bFinal = True
self.aPreviousRule = aRule
def finish (self):
"minimize unchecked nodes"
self._minimize(0)
def _minimize (self, downTo):
# proceed from the leaf up to a certain point
for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
oNode, sToken, oChildNode = self.lUncheckedNodes[i]
if oChildNode in self.lMinimizedNodes:
# replace the child with the previously encountered one
oNode.dArcs[sToken] = self.lMinimizedNodes[oChildNode]
else:
# add the state to the minimized nodes.
self.lMinimizedNodes[oChildNode] = oChildNode
self.lUncheckedNodes.pop()
def countNodes (self):
self.nNode = len(self.lMinimizedNodes)
def countArcs (self):
self.nArc = 0
for oNode in self.lMinimizedNodes:
self.nArc += len(oNode.dArcs)
def displayInfo (self):
print(" * {:<12} {:>16,}".format("Rules:", self.nRule))
print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))
def createGraph (self):
dGraph = { 0: self.oRoot.getNodeAsDict() }
print(0, "\t", self.oRoot.getNodeAsDict())
for oNode in self.lMinimizedNodes:
sHashId = oNode.__hash__()
if sHashId not in dGraph:
dGraph[sHashId] = oNode.getNodeAsDict()
print(sHashId, "\t", dGraph[sHashId])
else:
print("Error. Double node… same id: ", sHashId)
print(str(oNode.getNodeAsDict()))
return dGraph
class Node:
NextId = 0
def __init__ (self):
self.i = Node.NextId
Node.NextId += 1
self.bFinal = False
self.dArcs = {} # key: arc value; value: a node
@classmethod
def resetNextId (cls):
cls.NextId = 0
def __str__ (self):
# Caution! this function is used for hashing and comparison!
cFinal = "1" if self.bFinal else "0"
l = [cFinal]
for (key, oNode) in self.dArcs.items():
l.append(str(key))
l.append(str(oNode.i))
return "_".join(l)
def __hash__ (self):
# Used as a key in a python dictionary.
return self.__str__().__hash__()
def __eq__ (self, other):
# Used as a key in a python dictionary.
# Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
return self.__str__() == other.__str__()
def getNodeAsDict (self):
"returns the node as a dictionary structure"
dNode = {}
dReValue = {}
dReMorph = {}
dRules = {}
dLemmas = {}
for sArc, oNode in self.dArcs.items():
if sArc.startswith("~~") and len(sArc) > 2:
dReMorph[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("~") and len(sArc) > 1:
dReValue[sArc[1:]] = oNode.__hash__()
elif sArc.startswith(">") and len(sArc) > 1:
dLemmas[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("##"):
dRules[sArc[1:]] = oNode.__hash__()
else:
dNode[sArc] = oNode.__hash__()
if dReValue:
dNode["<re_value>"] = dReValue
if dReMorph:
dNode["<re_morph>"] = dReMorph
if dLemmas:
dNode["<lemmas>"] = dLemmas
if dRules:
dNode["<rules>"] = dRules
#if self.bFinal:
# dNode["<final>"] = 1
return dNode
|
Modified gc_core/js/lang_core/gc_engine.js from [7ee1350cd7] to [12095116ac].
| ︙ | ︙ | |||
35 36 37 38 39 40 41 | // data let _sAppContext = ""; // what software is running let _dOptions = null; let _aIgnoredRules = new Set(); let _oSpellChecker = null; | < | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
// data
let _sAppContext = ""; // what software is running
let _dOptions = null;
let _aIgnoredRules = new Set();
let _oSpellChecker = null;
var gc_engine = {
//// Informations
lang: "${lang}",
|
| ︙ | ︙ | |||
325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
_oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
} else {
_oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
}
_sAppContext = sContext;
_dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default
}
catch (e) {
helpers.logerror(e);
}
},
getSpellChecker: function () {
| > | 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
_oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
} else {
_oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_main_filename_js}", "${dic_extended_filename_js}", "${dic_community_filename_js}", "${dic_personal_filename_js}");
}
_sAppContext = sContext;
_dOptions = gc_options.getOptions(sContext).gl_shallowCopy(); // duplication necessary, to be able to reset to default
_oSpellChecker.activateStorage();
}
catch (e) {
helpers.logerror(e);
}
},
getSpellChecker: function () {
|
| ︙ | ︙ | |||
374 375 376 377 378 379 380 |
function displayInfo (dDA, aWord) {
// for debugging: info of word
if (!aWord) {
helpers.echo("> nothing to find");
return true;
}
| | > | | < < < < < < < < < < | < < < | | > | | | > | | < < < < < < < < < < < | 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 |
function displayInfo (dDA, aWord) {
// for debugging: info of word
if (!aWord) {
helpers.echo("> nothing to find");
return true;
}
let lMorph = _oSpellChecker.getMorph(aWord[1]);
if (lMorph.length === 0) {
helpers.echo("> not in dictionary");
return true;
}
if (dDA.has(aWord[0])) {
helpers.echo("DA: " + dDA.get(aWord[0]));
}
helpers.echo("FSA: " + lMorph);
return true;
}
function morph (dDA, aWord, sPattern, bStrict=true, bNoWord=false) {
// analyse a tuple (position, word), return true if sPattern in morphologies (disambiguation on)
if (!aWord) {
//helpers.echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//helpers.echo("aWord: "+aWord.toString());
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _oSpellChecker.getMorph(aWord[1]);
//helpers.echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//helpers.echo("***");
if (bStrict) {
return lMorph.every(s => (s.search(sPattern) !== -1));
}
return lMorph.some(s => (s.search(sPattern) !== -1));
}
function morphex (dDA, aWord, sPattern, sNegPattern, bNoWord=false) {
// analyse a tuple (position, word), returns true if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)
if (!aWord) {
//helpers.echo("morph: noword, returns " + bNoWord);
return bNoWord;
}
//helpers.echo("aWord: "+aWord.toString());
let lMorph = dDA.has(aWord[0]) ? dDA.get(aWord[0]) : _oSpellChecker.getMorph(aWord[1]);
//helpers.echo("lMorph: "+lMorph.toString());
if (lMorph.length === 0) {
return false;
}
//helpers.echo("***");
// check negative condition
if (lMorph.some(s => (s.search(sNegPattern) !== -1))) {
return false;
}
// search sPattern
return lMorph.some(s => (s.search(sPattern) !== -1));
}
function analyse (sWord, sPattern, bStrict=true) {
// analyse a word, return true if sPattern in morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(sWord);
if (lMorph.length === 0) {
return false;
}
if (bStrict) {
return lMorph.every(s => (s.search(sPattern) !== -1));
}
return lMorph.some(s => (s.search(sPattern) !== -1));
}
function analysex (sWord, sPattern, sNegPattern) {
// analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)
let lMorph = _oSpellChecker.getMorph(sWord);
if (lMorph.length === 0) {
return false;
}
// check negative condition
if (lMorph.some(s => (s.search(sNegPattern) !== -1))) {
return false;
}
// search sPattern
return lMorph.some(s => (s.search(sPattern) !== -1));
}
//// functions to get text outside pattern scope
// warning: check compile_rules.py to understand how it works
|
| ︙ | ︙ | |||
563 564 565 566 567 568 569 |
function select (dDA, nPos, sWord, sPattern, lDefault=null) {
if (!sWord) {
return true;
}
if (dDA.has(nPos)) {
return true;
}
| | < < | | | | < < | | | | 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
function select (dDA, nPos, sWord, sPattern, lDefault=null) {
if (!sWord) {
return true;
}
if (dDA.has(nPos)) {
return true;
}
let lMorph = _oSpellChecker.getMorph(sWord);
if (lMorph.length === 0 || lMorph.length === 1) {
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) !== -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dDA.set(nPos, lSelect);
}
} else if (lDefault) {
dDA.set(nPos, lDefaul);
}
return true;
}
function exclude (dDA, nPos, sWord, sPattern, lDefault=null) {
if (!sWord) {
return true;
}
if (dDA.has(nPos)) {
return true;
}
let lMorph = _oSpellChecker.getMorph(sWord);
if (lMorph.length === 0 || lMorph.length === 1) {
return true;
}
let lSelect = lMorph.filter( sMorph => sMorph.search(sPattern) === -1 );
if (lSelect.length > 0) {
if (lSelect.length != lMorph.length) {
dDA.set(nPos, lSelect);
}
} else if (lDefault) {
dDA.set(nPos, lDefault);
}
return true;
}
|
| ︙ | ︙ |
Modified gc_core/py/lang_core/gc_engine.py from [72ecd7c680] to [db19c73d82].
| ︙ | ︙ | |||
8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
__version__ = "${version}"
| > > > | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#import unicodedata
from itertools import chain
from ..graphspell.spellchecker import SpellChecker
from ..graphspell.echo import echo
from . import gc_options
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph
__all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
"load", "parse", "getSpellChecker", \
"setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
"ignoreRule", "resetIgnoreRules", "reactivateRule", "listRules", "displayRules" ]
__version__ = "${version}"
|
| ︙ | ︙ | |||
31 32 33 34 35 36 37 | _rules = None # module gc_rules # data _sAppContext = "" # what software is running _dOptions = None _aIgnoredRules = set() _oSpellChecker = None | < | | | > | > > > | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
_rules = None # module gc_rules
# data
_sAppContext = "" # what software is running
_dOptions = None
_aIgnoredRules = set()
_oSpellChecker = None
_oTokenizer = None
#### Parsing
def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
"analyses the paragraph sText and returns list of errors"
#sText = unicodedata.normalize("NFC", sText)
aErrors = None
sRealText = sText
dDA = {} # Disambiguisator. Key = position; value = list of morphologies
dPriority = {} # Key = position; value = priority
dOpt = _dOptions if not dOptions else dOptions
# parse paragraph
try:
sNew, aErrors = _proofread(sText, sRealText, 0, True, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
if sNew:
sText = sNew
except:
raise
# cleanup
if " " in sText:
sText = sText.replace(" ", ' ') # nbsp
if " " in sText:
sText = sText.replace(" ", ' ') # nnbsp
if "'" in sText:
sText = sText.replace("'", "’")
if "‑" in sText:
sText = sText.replace("‑", "-") # nobreakdash
# parse sentences
for iStart, iEnd in _getSentenceBoundaries(sText):
if 4 < (iEnd - iStart) < 2000:
dDA.clear()
try:
# regex parser
_, errs = _proofread(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dDA, dPriority, sCountry, dOpt, bDebug, bContext)
aErrors.update(errs)
# token parser
oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, dPriority, sCountry, dOpt, bDebug, bContext)
oSentence.parse()
except:
raise
return aErrors.values() # this is a view (iterable)
def _getSentenceBoundaries (sText):
iStart = _zBeginOfParagraph.match(sText).end()
|
| ︙ | ︙ | |||
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
_createError = _createDictError
def load (sContext="Python"):
global _oSpellChecker
global _sAppContext
global _dOptions
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
except:
traceback.print_exc()
def setOption (sOpt, bVal):
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
| > > > | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
_createError = _createDictError
def load (sContext="Python"):
global _oSpellChecker
global _sAppContext
global _dOptions
global _oTokenizer
try:
_oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_extended_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
_sAppContext = sContext
_dOptions = dict(gc_options.getOptions(sContext)) # duplication necessary, to be able to reset to default
_oTokenizer = _oSpellChecker.getTokenizer()
_oSpellChecker.activateStorage()
except:
traceback.print_exc()
def setOption (sOpt, bVal):
if sOpt in _dOptions:
_dOptions[sOpt] = bVal
|
| ︙ | ︙ | |||
367 368 369 370 371 372 373 |
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
# common regexes
| | | | | | | > | | < < < < < < < < < | | > < | < | | | | > | | < < < < < < < < | 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 |
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
#### common functions
# common regexes
_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")
_zNextWord = re.compile(r" +(\w[\w-]*)")
_zPrevWord = re.compile(r"(\w[\w-]*) +$")
def option (sOpt):
"return True if option sOpt is active"
return _dOptions.get(sOpt, False)
def displayInfo (dDA, tWord):
"for debugging: retrieve info of word"
if not tWord:
echo("> nothing to find")
return True
lMorph = _oSpellChecker.getMorph(tWord[1])
if not lMorph:
echo("> not in dictionary")
return True
if tWord[0] in dDA:
echo("DA: " + str(dDA[tWord[0]]))
echo("FSA: " + str(lMorph))
return True
def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
"analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in lMorph)
return any(p.search(s) for s in lMorph)
def morphex (dDA, tWord, sPattern, sNegPattern, bNoWord=False):
"analyse a tuple (position, word), returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation on)"
if not tWord:
return bNoWord
lMorph = dDA[tWord[0]] if tWord[0] in dDA else _oSpellChecker.getMorph(tWord[1])
if not lMorph:
return False
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in lMorph):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in lMorph)
def analyse (sWord, sPattern, bStrict=True):
"analyse a word, return True if sPattern in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph:
return False
p = re.compile(sPattern)
if bStrict:
return all(p.search(s) for s in lMorph)
return any(p.search(s) for s in lMorph)
def analysex (sWord, sPattern, sNegPattern):
"analyse a word, returns True if not sNegPattern in word morphologies and sPattern in word morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph:
return False
# check negative condition
np = re.compile(sNegPattern)
if any(np.search(s) for s in lMorph):
return False
# search sPattern
p = re.compile(sPattern)
return any(p.search(s) for s in lMorph)
## functions to get text outside pattern scope
# warning: check compile_rules.py to understand how it works
def nextword (s, iStart, n):
|
| ︙ | ︙ | |||
532 533 534 535 536 537 538 |
#### Disambiguator
def select (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
| | < | | | < < | < | | | < < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 |
#### Disambiguator
def select (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dDA[nPos] = lSelect
elif lDefault:
dDA[nPos] = lDefault
return True
def exclude (dDA, nPos, sWord, sPattern, lDefault=None):
if not sWord:
return True
if nPos in dDA:
return True
lMorph = _oSpellChecker.getMorph(sWord)
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dDA[nPos] = lSelect
elif lDefault:
dDA[nPos] = lDefault
return True
def define (dDA, nPos, lMorph):
dDA[nPos] = lMorph
return True
#### GRAMMAR CHECKER PLUGINS
${plugins}
#### CALLABLES (generated code)
${callables}
#### TOKEN SENTENCE CHECKER
class TokenSentence:
def __init__ (self, sSentence, sSentence0, iStart, dPriority, sCountry, dOpt, bDebug, bContext):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.iStart = iStart
self.lToken = list(_oTokenizer.genTokens(sSentence))
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
for i, dPointer in enumerate(lPointer):
bValid = False
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
bValid = True
if not bValid:
del lPointer[i]
for dNode in self._getNextMatchingNodes(dToken, dGraph):
lPointer.append({"nOffset": 0, "dNode": dNode})
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
dErr = self._executeActions(dNode, nOffset)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
if "<lemmas>" in dNode:
for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
if sLemma in dNode["<lemmas>"]:
yield dGraph[dNode["<lemmas>"][sLemma]]
# universal arc
if "*" in dNode:
yield dGraph[dNode["*"]]
# regex value arcs
if "<re_value>" in dNode:
for sRegex in dNode["<re_value>"]:
if re.search(sRegex, dToken["sValue"]):
yield dGraph[dNode["<re_value>"][sRegex]]
# regex morph arcs
if "<re_morph>" in dNode:
for sRegex in dNode["<re_morph>"]:
for sMorph in _oSpellChecker.getMorph(dToken["sValue"]):
if re.search(sRegex, sMorph):
yield dGraph[dNode["<re_morph>"][sRegex]]
def _executeActions (self, dNode, nOffset):
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
print(sArc)
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
print("-")
nErrorStart = nSentenceOffset + m.start(eAct[0])
nErrorEnd = nSentenceOffset + m.start(eAct[1])
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
print("~")
self._rewrite(sWhat, nErrorStart, nErrorEnd)
elif cActionType == "@":
# jump
print("@")
self._jump(sWhat)
elif cActionType == "=":
# disambiguation
print("=")
globals()[sWhat](self.lToken)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
print(">")
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
def _createWriterError (self):
d = {}
return d
def _createDictError (self):
d = {}
return d
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue):
self.lToken[i]["sValue"] = sValue
def _jump (self, sWhat):
try:
nFrom, nTo = sWhat.split(">")
self.lToken[int(nFrom)]["iJump"] = int(nTo)
except:
print("# Error. Jump failed: ", sWhat)
traceback.print_exc()
return
#### Analyse tokens
def g_morph (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in lMorph)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_morphex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_analyse (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in lMorph)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_analysex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
lMorph = _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
#### Go outside the rule scope
def g_nextToken (i):
pass
def g_prevToken (i):
pass
def g_look ():
pass
def g_lookAndCheck ():
pass
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_exclude (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
lMorph = dToken["lMorph"] if "lMorph" in dToken else _oSpellChecker.getMorph(dToken["sValue"])
if not lMorph or len(lMorph) == 1:
return True
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_define (dToken, lMorph):
"set morphologies of <dToken>, always return True"
dToken["lMorph"] = lMorph
return True
#### CALLABLES (generated code)
${graph_callables}
|
Added gc_core/py/lang_core/gc_rules_graph.py version [e9a58f5498].
> > > > > | 1 2 3 4 5 |
# generated code, do not edit
dGraph = ${rules_graph}
dRule = ${rules_actions}
|
Added gc_core/py/lang_core/gc_sentence.py version [c68dc1622f].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# Sentence checker
from ..graphspell.tokenizer import Tokenizer
from .gc_rules_graph import dGraph
oTokenizer = Tokenizer("${lang}")
class TokenSentence:
def __init__ (self, sSentence, sSentence0, nOffset):
self.sSentence = sSentence
self.sSentence0 = sSentence0
self.nOffset = nOffset
self.lToken = list(oTokenizer.genTokens())
def parse (self):
dErr = {}
lPointer = []
for dToken in self.lToken:
for i, dPointer in enumerate(lPointer):
bValid = False
for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
dPointer["nOffset"] = dToken["i"]
dPointer["dNode"] = dNode
bValid = True
if not bValid:
del lPointer[i]
for dNode in self._getNextMatchingNodes(dToken, dGraph):
lPointer.append({"nOffset": 0, "dNode": dNode})
for dPointer in lPointer:
if "<rules>" in dPointer["dNode"]:
for dNode in dGraph[dPointer["dNode"]["<rules>"]]:
dErr = self._executeActions(dNode, nOffset)
return dErr
def _getNextMatchingNodes (self, dToken, dNode):
# token value
if dToken["sValue"] in dNode:
yield dGraph[dNode[dToken["sValue"]]]
# token lemmas
for sLemma in dToken["lLemma"]:
if sLemma in dNode:
yield dGraph[dNode[sLemma]]
# universal arc
if "*" in dNode:
yield dGraph[dNode["*"]]
# regex arcs
if "~" in dNode:
for sRegex in dNode["~"]:
for sMorph in dToken["lMorph"]:
if re.search(sRegex, sMorph):
yield dGraph[dNode["~"][sRegex]]
def _executeActions (self, dNode, nOffset):
for sLineId, nextNodeKey in dNode.items():
for sArc in dGraph[nextNodeKey]:
bCondMemo = None
sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
# action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
try:
bCondMemo = not sFuncCond or globals()[sFuncCond](self, sCountry, bCondMemo)
if bCondMemo:
if cActionType == "-":
# grammar error
nErrorStart = nSentenceOffset + m.start(eAct[0])
nErrorEnd = nSentenceOffset + m.start(eAct[1])
if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
dPriority[nErrorStart] = nPriority
elif cActionType == "~":
# text processor
self._rewrite(sWhat, nErrorStart, nErrorEnd)
elif cActionType == "@":
# jump
self._jump(sWhat)
elif cActionType == "=":
# disambiguation
globals()[sWhat](self.lToken)
elif cActionType == ">":
# we do nothing, this test is just a condition to apply all following actions
pass
else:
print("# error: unknown action at " + sLineId)
elif cActionType == ">":
break
except Exception as e:
raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
def _createWriterError (self):
d = {}
return d
def _createDictError (self):
d = {}
return d
def _rewrite (self, sWhat, nErrorStart, nErrorEnd):
"text processor: rewrite tokens between <nErrorStart> and <nErrorEnd> position"
lTokenValue = sWhat.split("|")
if len(lTokenValue) != (nErrorEnd - nErrorStart + 1):
print("Error. Text processor: number of replacements != number of tokens.")
return
for i, sValue in zip(range(nErrorStart, nErrorEnd+1), lTokenValue):
self.lToken[i]["sValue"] = sValue
def _jump (self, sWhat):
try:
nFrom, nTo = sWhat.split(">")
self.lToken[int(nFrom)]["iJump"] = int(nTo)
except:
print("# Error. Jump failed: ", sWhat)
traceback.print_exc()
return
#### Analyse tokens
def g_morph (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return False
if not _dAnalyses[dToken["sValue"]]:
return False
lMorph = _dAnalyses[dToken["sValue"]]
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in lMorph)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_morphex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
if "lMorph" in dToken:
lMorph = dToken["lMorph"]
else:
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return False
if not _dAnalyses[dToken["sValue"]]:
return False
lMorph = _dAnalyses[dToken["sValue"]]
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in lMorph):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in lMorph)
def g_analyse (dToken, sPattern, bStrict=True):
"analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return False
if not _dAnalyses[dToken["sValue"]]:
return False
zPattern = re.compile(sPattern)
if bStrict:
return all(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]])
return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]])
def g_analysex (dToken, sPattern, sNegPattern):
"analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return False
if not _dAnalyses[dToken["sValue"]]:
return False
# check negative condition
zNegPattern = re.compile(sNegPattern)
if any(zNegPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]]):
return False
# search sPattern
zPattern = re.compile(sPattern)
return any(zPattern.search(sMorph) for sMorph in _dAnalyses[dToken["sValue"]])
#### Go outside the rule scope
def g_nextToken (i):
pass
def g_prevToken (i):
pass
def g_look ():
pass
def g_lookAndCheck ():
pass
#### Disambiguator
def g_select (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return True
if len(_dAnalyses[dToken["sValue"]]) == 1:
return True
lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]]
lSelect = [ sMorph for sMorph in lMorph if re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_exclude (dToken, sPattern, lDefault=None):
"select morphologies for <dToken> according to <sPattern>, always return True"
if dToken["sValue"] not in _dAnalyses and not _storeMorphFromFSA(dToken["sValue"]):
return True
if len(_dAnalyses[dToken["sValue"]]) == 1:
return True
lMorph = dToken["lMorph"] or _dAnalyses[dToken["sValue"]]
lSelect = [ sMorph for sMorph in lMorph if not re.search(sPattern, sMorph) ]
if lSelect:
if len(lSelect) != len(lMorph):
dToken["lMorph"] = lSelect
elif lDefault:
dToken["lMorph"] = lDefault
return True
def g_define (dToken, lMorph):
"set morphologies of <dToken>, always return True"
dToken["lMorph"] = lMorph
return True
#### CALLABLES (generated code)
${graph_callables}
|
Modified gc_lang/fr/modules-js/gce_analyseur.js from [e2613ddcd2] to [bdc2b54804].
| ︙ | ︙ | |||
18 19 20 21 22 23 24 |
if (s2 == "vous") {
return "vous";
}
if (s2 == "eux") {
return "ils";
}
if (s2 == "elle" || s2 == "elles") {
| < | < | | > | | < | | | | < | | | | < | | | | | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
if (s2 == "vous") {
return "vous";
}
if (s2 == "eux") {
return "ils";
}
if (s2 == "elle" || s2 == "elles") {
if (cregex.mbNprMasNotFem(_oSpellChecker.getMorph(s1))) {
return "ils";
}
// si épicène, indéterminable, mais OSEF, le féminin l’emporte
return "elles";
}
return s1 + " et " + s2;
}
function apposition (sWord1, sWord2) {
// returns true if nom + nom (no agreement required)
return cregex.mbNomNotAdj(_oSpellChecker.getMorph(sWord2)) && cregex.mbPpasNomNotAdj(_oSpellChecker.getMorph(sWord1));
}
function isAmbiguousNAV (sWord) {
// words which are nom|adj and verb are ambiguous (except être and avoir)
let lMorph = _oSpellChecker.getMorph(sWord);
if (lMorph.length === 0) {
return false;
}
if (!cregex.mbNomAdj(lMorph) || sWord == "est") {
return false;
}
if (cregex.mbVconj(lMorph) && !cregex.mbMG(lMorph)) {
return true;
}
return false;
}
function isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj) {
//// use it if sWord1 won’t be a verb; word2 is assumed to be true via isAmbiguousNAV
let a2 = _oSpellChecker.getMorph(sWord2);
if (a2.length === 0) {
return false;
}
if (cregex.checkConjVerb(a2, sReqMorphConj)) {
// verb word2 is ok
return false;
}
let a1 = _oSpellChecker.getMorph(sWord1);
if (a1.length === 0) {
return false;
}
if (cregex.checkAgreement(a1, a2) && (cregex.mbAdj(a2) || cregex.mbAdj(a1))) {
return false;
}
return true;
}
function isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond) {
//// use it if sWord1 can be also a verb; word2 is assumed to be true via isAmbiguousNAV
let a2 = _oSpellChecker.getMorph(sWord2);
if (a2.length === 0) {
return false;
}
if (cregex.checkConjVerb(a2, sReqMorphConj)) {
// verb word2 is ok
return false;
}
let a1 = _oSpellChecker.getMorph(sWord1);
if (a1.length === 0) {
return false;
}
if (cregex.checkAgreement(a1, a2) && (cregex.mbAdj(a2) || cregex.mbAdjNb(a1))) {
return false;
}
// now, we know there no agreement, and conjugation is also wrong
if (cregex.isNomAdj(a1)) {
return true;
}
//if cregex.isNomAdjVerb(a1): # considered true
if (bLastHopeCond) {
return true;
}
return false;
}
function checkAgreement (sWord1, sWord2) {
let a2 = _oSpellChecker.getMorph(sWord2);
if (a2.length === 0) {
return true;
}
let a1 = _oSpellChecker.getMorph(sWord1);
if (a1.length === 0) {
return true;
}
return cregex.checkAgreement(a1, a2);
}
function mbUnit (s) {
if (/[µ\/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]/.test(s)) {
|
| ︙ | ︙ |
Modified gc_lang/fr/modules-js/gce_suggestions.js from [0c31bc1a27] to [7b6a3f8cde].
| ︙ | ︙ | |||
8 9 10 11 12 13 14 |
var phonet = require("resource://grammalecte/fr/phonet.js");
}
//// verbs
function suggVerb (sFlex, sWho, funcSugg2=null) {
| < | | | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
var phonet = require("resource://grammalecte/fr/phonet.js");
}
//// verbs
function suggVerb (sFlex, sWho, funcSugg2=null) {
let aSugg = new Set();
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
let tTags = conj._getTags(sStem);
if (tTags) {
// we get the tense
let aTense = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
let m;
let zVerb = new RegExp (">"+sStem+" .*?(:(?:Y|I[pqsf]|S[pq]|K))", "g");
while ((m = zVerb.exec(sMorph)) !== null) {
// stem must be used in regex to prevent confusion between different verbs (e.g. sauras has 2 stems: savoir and saurer)
if (m) {
if (m[1] === ":Y") {
aTense.add(":Ip");
|
| ︙ | ︙ | |||
57 58 59 60 61 62 63 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbPpas (sFlex, sWhat=null) {
let aSugg = new Set();
| | | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbPpas (sFlex, sWhat=null) {
let aSugg = new Set();
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
let tTags = conj._getTags(sStem);
if (tTags) {
if (!sWhat) {
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"));
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"));
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"));
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"));
|
| ︙ | ︙ | |||
107 108 109 110 111 112 113 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbTense (sFlex, sTense, sWho) {
let aSugg = new Set();
| | | | | 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbTense (sFlex, sTense, sWho) {
let aSugg = new Set();
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
if (conj.hasConj(sStem, sTense, sWho)) {
aSugg.add(conj.getConj(sStem, sTense, sWho));
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbImpe (sFlex) {
let aSugg = new Set();
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
let tTags = conj._getTags(sStem);
if (tTags) {
if (conj._hasConjWithTags(tTags, ":E", ":2s")) {
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s"));
}
if (conj._hasConjWithTags(tTags, ":E", ":1p")) {
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p"));
}
if (conj._hasConjWithTags(tTags, ":E", ":2p")) {
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p"));
}
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function suggVerbInfi (sFlex) {
return _oSpellChecker.getLemma(sFlex).filter(sStem => conj.isVerb(sStem)).join("|");
}
const _dQuiEst = new Map ([
["je", ":1s"], ["j’", ":1s"], ["j’en", ":1s"], ["j’y", ":1s"],
["tu", ":2s"], ["il", ":3s"], ["on", ":3s"], ["elle", ":3s"],
["nous", ":1p"], ["vous", ":2p"], ["ils", ":3p"], ["elles", ":3p"]
|
| ︙ | ︙ | |||
172 173 174 175 176 177 178 |
if (!sWho) {
if (sSuj[0].gl_isLowerCase()) { // pas un pronom, ni un nom propre
return "";
}
sWho = ":3s";
}
let aSugg = new Set();
| | | > | | 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
if (!sWho) {
if (sSuj[0].gl_isLowerCase()) { // pas un pronom, ni un nom propre
return "";
}
sWho = ":3s";
}
let aSugg = new Set();
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
let tTags = conj._getTags(sStem);
if (tTags) {
for (let sTense of lMode) {
if (conj._hasConjWithTags(tTags, sTense, sWho)) {
aSugg.add(conj._getConjWithTags(sStem, tTags, sTense, sWho));
}
}
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
//// Nouns and adjectives
function suggPlur (sFlex, sWordToAgree=null) {
// returns plural forms assuming sFlex is singular
if (sWordToAgree) {
let lMorph = _oSpellChecker.getMorph(sWordToAgree);
if (lMorph.length === 0) {
return "";
}
let sGender = cregex.getGender(lMorph);
if (sGender == ":m") {
return suggMasPlur(sFlex);
} else if (sGender == ":f") {
return suggFemPlur(sFlex);
}
}
let aSugg = new Set();
|
| ︙ | ︙ | |||
254 255 256 257 258 259 260 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggMasSing (sFlex, bSuggSimil=false) {
// returns masculine singular forms
| < | | 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggMasSing (sFlex, bSuggSimil=false) {
// returns masculine singular forms
let aSugg = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (!sMorph.includes(":V")) {
// not a verb
if (sMorph.includes(":m") || sMorph.includes(":e")) {
aSugg.add(suggSing(sFlex));
} else {
let sStem = cregex.getLemmaOfMorph(sMorph);
if (mfsp.isFemForm(sStem)) {
|
| ︙ | ︙ | |||
290 291 292 293 294 295 296 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggMasPlur (sFlex, bSuggSimil=false) {
// returns masculine plural forms
| < | | 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggMasPlur (sFlex, bSuggSimil=false) {
// returns masculine plural forms
let aSugg = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (!sMorph.includes(":V")) {
// not a verb
if (sMorph.includes(":m") || sMorph.includes(":e")) {
aSugg.add(suggPlur(sFlex));
} else {
let sStem = cregex.getLemmaOfMorph(sMorph);
if (mfsp.isFemForm(sStem)) {
|
| ︙ | ︙ | |||
331 332 333 334 335 336 337 |
}
return "";
}
function suggFemSing (sFlex, bSuggSimil=false) {
// returns feminine singular forms
| < | | 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 |
}
return "";
}
function suggFemSing (sFlex, bSuggSimil=false) {
// returns feminine singular forms
let aSugg = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (!sMorph.includes(":V")) {
// not a verb
if (sMorph.includes(":f") || sMorph.includes(":e")) {
aSugg.add(suggSing(sFlex));
} else {
let sStem = cregex.getLemmaOfMorph(sMorph);
if (mfsp.isFemForm(sStem)) {
|
| ︙ | ︙ | |||
365 366 367 368 369 370 371 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggFemPlur (sFlex, bSuggSimil=false) {
// returns feminine plural forms
| < | | 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 |
return Array.from(aSugg).join("|");
}
return "";
}
function suggFemPlur (sFlex, bSuggSimil=false) {
// returns feminine plural forms
let aSugg = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (!sMorph.includes(":V")) {
// not a verb
if (sMorph.includes(":f") || sMorph.includes(":e")) {
aSugg.add(suggPlur(sFlex));
} else {
let sStem = cregex.getLemmaOfMorph(sMorph);
if (mfsp.isFemForm(sStem)) {
|
| ︙ | ︙ | |||
398 399 400 401 402 403 404 |
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function hasFemForm (sFlex) {
| | | < | | | | | | 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 |
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function hasFemForm (sFlex) {
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
if (mfsp.isFemForm(sStem) || conj.hasConj(sStem, ":PQ", ":Q3")) {
return true;
}
}
if (phonet.hasSimil(sFlex, ":f")) {
return true;
}
return false;
}
function hasMasForm (sFlex) {
for (let sStem of _oSpellChecker.getLemma(sFlex)) {
if (mfsp.isFemForm(sStem) || conj.hasConj(sStem, ":PQ", ":Q1")) {
// what has a feminine form also has a masculine form
return true;
}
}
if (phonet.hasSimil(sFlex, ":m")) {
return true;
}
return false;
}
function switchGender (sFlex, bPlur=null) {
let aSugg = new Set();
if (bPlur === null) {
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (sMorph.includes(":f")) {
if (sMorph.includes(":s")) {
aSugg.add(suggMasSing(sFlex));
} else if (sMorph.includes(":p")) {
aSugg.add(suggMasPlur(sFlex));
}
} else if (sMorph.includes(":m")) {
if (sMorph.includes(":s")) {
aSugg.add(suggFemSing(sFlex));
} else if (sMorph.includes(":p")) {
aSugg.add(suggFemPlur(sFlex));
} else {
aSugg.add(suggFemSing(sFlex));
aSugg.add(suggFemPlur(sFlex));
}
}
}
} else if (bPlur) {
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (sMorph.includes(":f")) {
aSugg.add(suggMasPlur(sFlex));
} else if (sMorph.includes(":m")) {
aSugg.add(suggFemPlur(sFlex));
}
}
} else {
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (sMorph.includes(":f")) {
aSugg.add(suggMasSing(sFlex));
} else if (sMorph.includes(":m")) {
aSugg.add(suggFemSing(sFlex));
}
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function switchPlural (sFlex) {
let aSugg = new Set();
for (let sMorph of _oSpellChecker.getMorph(sFlex)) {
if (sMorph.includes(":s")) {
aSugg.add(suggPlur(sFlex));
} else if (sMorph.includes(":p")) {
aSugg.add(suggSing(sFlex));
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
return "";
}
function hasSimil (sWord, sPattern=null) {
return phonet.hasSimil(sWord, sPattern);
}
function suggSimil (sWord, sPattern=null, bSubst=false) {
// return list of words phonetically similar to sWord and whom POS is matching sPattern
let aSugg = phonet.selectSimil(sWord, sPattern);
for (let sMorph of _oSpellChecker.getMorph(sWord)) {
for (let e of conj.getSimil(sWord, sMorph, bSubst)) {
aSugg.add(e);
}
}
if (aSugg.size > 0) {
return Array.from(aSugg).join("|");
}
|
| ︙ | ︙ | |||
511 512 513 514 515 516 517 |
if (sWord[0] == "h" || sWord[0] == "H") {
return "ce|cet";
}
return "ce";
}
function suggLesLa (sWord) {
| < | | 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 |
if (sWord[0] == "h" || sWord[0] == "H") {
return "ce|cet";
}
return "ce";
}
function suggLesLa (sWord) {
if (_oSpellChecker.getMorph(sWord).some(s => s.includes(":p"))) {
return "les|la";
}
return "la";
}
function formatNumber (s) {
let nLen = s.length;
|
| ︙ | ︙ |
Modified gc_lang/fr/modules-js/tests_data.json from [f05e835c66] to [ef6f6c1c40].
|
| | | 1 |
${regex_gctestsJS}
|
Modified gc_lang/fr/modules/gce_analyseur.py from [39975de0ac] to [50ac148025].
| ︙ | ︙ | |||
13 14 15 16 17 18 19 |
return "vous"
if s2 == "nous":
return "nous"
if s2 == "vous":
return "vous"
if s2 == "eux":
return "ils"
| | < | < | | < | | < | | < | | < | | | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
return "vous"
if s2 == "nous":
return "nous"
if s2 == "vous":
return "vous"
if s2 == "eux":
return "ils"
if s2 == "elle" or s2 == "elles":
if cr.mbNprMasNotFem(_oSpellChecker.getMorph(s1)):
return "ils"
# si épicène, indéterminable, mais OSEF, le féminin l’emporte
return "elles"
return s1 + " et " + s2
def apposition (sWord1, sWord2):
"returns True if nom + nom (no agreement required)"
return cr.mbNomNotAdj(_oSpellChecker.getMorph(sWord2)) and cr.mbPpasNomNotAdj(_oSpellChecker.getMorph(sWord1))
def isAmbiguousNAV (sWord):
"words which are nom|adj and verb are ambiguous (except être and avoir)"
lMorph = _oSpellChecker.getMorph(sWord)
if not cr.mbNomAdj(lMorph) or sWord == "est":
return False
if cr.mbVconj(lMorph) and not cr.mbMG(lMorph):
return True
return False
def isAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj):
"use it if sWord1 won’t be a verb; word2 is assumed to be True via isAmbiguousNAV"
a2 = _oSpellChecker.getMorph(sWord2)
if not a2:
return False
if cr.checkConjVerb(a2, sReqMorphConj):
# verb word2 is ok
return False
a1 = _oSpellChecker.getMorph(sWord1)
if not a1:
return False
if cr.checkAgreement(a1, a2) and (cr.mbAdj(a2) or cr.mbAdj(a1)):
return False
return True
def isVeryAmbiguousAndWrong (sWord1, sWord2, sReqMorphNA, sReqMorphConj, bLastHopeCond):
"use it if sWord1 can be also a verb; word2 is assumed to be True via isAmbiguousNAV"
a2 = _oSpellChecker.getMorph(sWord2)
if not a2:
return False
if cr.checkConjVerb(a2, sReqMorphConj):
# verb word2 is ok
return False
a1 = _oSpellChecker.getMorph(sWord1)
if not a1:
return False
if cr.checkAgreement(a1, a2) and (cr.mbAdj(a2) or cr.mbAdjNb(a1)):
return False
# now, we know there no agreement, and conjugation is also wrong
if cr.isNomAdj(a1):
return True
#if cr.isNomAdjVerb(a1): # considered True
if bLastHopeCond:
return True
return False
def checkAgreement (sWord1, sWord2):
a2 = _oSpellChecker.getMorph(sWord2)
if not a2:
return True
a1 = _oSpellChecker.getMorph(sWord1)
if not a1:
return True
return cr.checkAgreement(a1, a2)
_zUnitSpecial = re.compile("[µ/⁰¹²³⁴⁵⁶⁷⁸⁹Ωℓ·]")
_zUnitNumbers = re.compile("[0-9]")
|
| ︙ | ︙ |
Modified gc_lang/fr/modules/gce_suggestions.py from [79835965e4] to [818aeb6977].
1 2 3 4 5 6 7 8 9 10 11 |
#### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms
from . import conj
from . import mfsp
from . import phonet
## Verbs
def suggVerb (sFlex, sWho, funcSugg2=None):
aSugg = set()
| | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#### GRAMMAR CHECKING ENGINE PLUGIN: Suggestion mechanisms
from . import conj
from . import mfsp
from . import phonet
## Verbs
def suggVerb (sFlex, sWho, funcSugg2=None):
aSugg = set()
for sStem in _oSpellChecker.getLemma(sFlex):
tTags = conj._getTags(sStem)
if tTags:
# we get the tense
aTense = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
for m in re.finditer(">"+sStem+" .*?(:(?:Y|I[pqsf]|S[pq]|K|P))", sMorph):
# stem must be used in regex to prevent confusion between different verbs (e.g. sauras has 2 stems: savoir and saurer)
if m:
if m.group(1) == ":Y":
aTense.add(":Ip")
aTense.add(":Iq")
aTense.add(":Is")
|
| ︙ | ︙ | |||
38 39 40 41 42 43 44 |
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbPpas (sFlex, sWhat=None):
aSugg = set()
| | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbPpas (sFlex, sWhat=None):
aSugg = set()
for sStem in _oSpellChecker.getLemma(sFlex):
tTags = conj._getTags(sStem)
if tTags:
if not sWhat:
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q1"))
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q2"))
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q3"))
aSugg.add(conj._getConjWithTags(sStem, tTags, ":PQ", ":Q4"))
|
| ︙ | ︙ | |||
81 82 83 84 85 86 87 |
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbTense (sFlex, sTense, sWho):
aSugg = set()
| | | | | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbTense (sFlex, sTense, sWho):
aSugg = set()
for sStem in _oSpellChecker.getLemma(sFlex):
if conj.hasConj(sStem, sTense, sWho):
aSugg.add(conj.getConj(sStem, sTense, sWho))
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbImpe (sFlex):
aSugg = set()
for sStem in _oSpellChecker.getLemma(sFlex):
tTags = conj._getTags(sStem)
if tTags:
if conj._hasConjWithTags(tTags, ":E", ":2s"):
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2s"))
if conj._hasConjWithTags(tTags, ":E", ":1p"):
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":1p"))
if conj._hasConjWithTags(tTags, ":E", ":2p"):
aSugg.add(conj._getConjWithTags(sStem, tTags, ":E", ":2p"))
if aSugg:
return "|".join(aSugg)
return ""
def suggVerbInfi (sFlex):
return "|".join([ sStem for sStem in _oSpellChecker.getLemma(sFlex) if conj.isVerb(sStem) ])
_dQuiEst = { "je": ":1s", "j’": ":1s", "j’en": ":1s", "j’y": ":1s", \
"tu": ":2s", "il": ":3s", "on": ":3s", "elle": ":3s", "nous": ":1p", "vous": ":2p", "ils": ":3p", "elles": ":3p" }
_lIndicatif = [":Ip", ":Iq", ":Is", ":If"]
_lSubjonctif = [":Sp", ":Sq"]
|
| ︙ | ︙ | |||
129 130 131 132 133 134 135 |
return ""
sWho = _dQuiEst.get(sSuj.lower(), None)
if not sWho:
if sSuj[0:1].islower(): # pas un pronom, ni un nom propre
return ""
sWho = ":3s"
aSugg = set()
| | > | | | 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
return ""
sWho = _dQuiEst.get(sSuj.lower(), None)
if not sWho:
if sSuj[0:1].islower(): # pas un pronom, ni un nom propre
return ""
sWho = ":3s"
aSugg = set()
for sStem in _oSpellChecker.getLemma(sFlex):
tTags = conj._getTags(sStem)
if tTags:
for sTense in lMode:
if conj._hasConjWithTags(tTags, sTense, sWho):
aSugg.add(conj._getConjWithTags(sStem, tTags, sTense, sWho))
if aSugg:
return "|".join(aSugg)
return ""
## Nouns and adjectives
def suggPlur (sFlex, sWordToAgree=None):
"returns plural forms assuming sFlex is singular"
if sWordToAgree:
lMorph = _oSpellChecker.getMorph(sFlex)
if not lMorph:
return ""
sGender = cr.getGender(lMorph)
if sGender == ":m":
return suggMasPlur(sFlex)
elif sGender == ":f":
return suggFemPlur(sFlex)
aSugg = set()
if "-" not in sFlex:
if sFlex.endswith("l"):
|
| ︙ | ︙ | |||
189 190 191 192 193 194 195 |
if aSugg:
return "|".join(aSugg)
return ""
def suggMasSing (sFlex, bSuggSimil=False):
"returns masculine singular forms"
| < | | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
if aSugg:
return "|".join(aSugg)
return ""
def suggMasSing (sFlex, bSuggSimil=False):
"returns masculine singular forms"
aSugg = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
if not ":V" in sMorph:
# not a verb
if ":m" in sMorph or ":e" in sMorph:
aSugg.add(suggSing(sFlex))
else:
sStem = cr.getLemmaOfMorph(sMorph)
if mfsp.isFemForm(sStem):
|
| ︙ | ︙ | |||
217 218 219 220 221 222 223 |
if aSugg:
return "|".join(aSugg)
return ""
def suggMasPlur (sFlex, bSuggSimil=False):
"returns masculine plural forms"
| < | | 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
if aSugg:
return "|".join(aSugg)
return ""
def suggMasPlur (sFlex, bSuggSimil=False):
"returns masculine plural forms"
aSugg = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
if not ":V" in sMorph:
# not a verb
if ":m" in sMorph or ":e" in sMorph:
aSugg.add(suggPlur(sFlex))
else:
sStem = cr.getLemmaOfMorph(sMorph)
if mfsp.isFemForm(sStem):
|
| ︙ | ︙ | |||
248 249 250 251 252 253 254 |
if aSugg:
return "|".join(aSugg)
return ""
def suggFemSing (sFlex, bSuggSimil=False):
"returns feminine singular forms"
| < | | 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
if aSugg:
return "|".join(aSugg)
return ""
def suggFemSing (sFlex, bSuggSimil=False):
"returns feminine singular forms"
aSugg = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
if not ":V" in sMorph:
# not a verb
if ":f" in sMorph or ":e" in sMorph:
aSugg.add(suggSing(sFlex))
else:
sStem = cr.getLemmaOfMorph(sMorph)
if mfsp.isFemForm(sStem):
|
| ︙ | ︙ | |||
274 275 276 277 278 279 280 |
if aSugg:
return "|".join(aSugg)
return ""
def suggFemPlur (sFlex, bSuggSimil=False):
"returns feminine plural forms"
| < | | 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
if aSugg:
return "|".join(aSugg)
return ""
def suggFemPlur (sFlex, bSuggSimil=False):
"returns feminine plural forms"
aSugg = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
if not ":V" in sMorph:
# not a verb
if ":f" in sMorph or ":e" in sMorph:
aSugg.add(suggPlur(sFlex))
else:
sStem = cr.getLemmaOfMorph(sMorph)
if mfsp.isFemForm(sStem):
|
| ︙ | ︙ | |||
299 300 301 302 303 304 305 |
aSugg.add(e)
if aSugg:
return "|".join(aSugg)
return ""
def hasFemForm (sFlex):
| | | < | | | < | < | < | | 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 |
aSugg.add(e)
if aSugg:
return "|".join(aSugg)
return ""
def hasFemForm (sFlex):
for sStem in _oSpellChecker.getLemma(sFlex):
if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q3"):
return True
if phonet.hasSimil(sFlex, ":f"):
return True
return False
def hasMasForm (sFlex):
for sStem in _oSpellChecker.getLemma(sFlex):
if mfsp.isFemForm(sStem) or conj.hasConj(sStem, ":PQ", ":Q1"):
# what has a feminine form also has a masculine form
return True
if phonet.hasSimil(sFlex, ":m"):
return True
return False
def switchGender (sFlex, bPlur=None):
aSugg = set()
if bPlur == None:
for sMorph in _oSpellChecker.getMorph(sFlex):
if ":f" in sMorph:
if ":s" in sMorph:
aSugg.add(suggMasSing(sFlex))
elif ":p" in sMorph:
aSugg.add(suggMasPlur(sFlex))
elif ":m" in sMorph:
if ":s" in sMorph:
aSugg.add(suggFemSing(sFlex))
elif ":p" in sMorph:
aSugg.add(suggFemPlur(sFlex))
else:
aSugg.add(suggFemSing(sFlex))
aSugg.add(suggFemPlur(sFlex))
elif bPlur:
for sMorph in _oSpellChecker.getMorph(sFlex):
if ":f" in sMorph:
aSugg.add(suggMasPlur(sFlex))
elif ":m" in sMorph:
aSugg.add(suggFemPlur(sFlex))
else:
for sMorph in _oSpellChecker.getMorph(sFlex):
if ":f" in sMorph:
aSugg.add(suggMasSing(sFlex))
elif ":m" in sMorph:
aSugg.add(suggFemSing(sFlex))
if aSugg:
return "|".join(aSugg)
return ""
def switchPlural (sFlex):
aSugg = set()
for sMorph in _oSpellChecker.getMorph(sFlex):
if ":s" in sMorph:
aSugg.add(suggPlur(sFlex))
elif ":p" in sMorph:
aSugg.add(suggSing(sFlex))
if aSugg:
return "|".join(aSugg)
return ""
def hasSimil (sWord, sPattern=None):
return phonet.hasSimil(sWord, sPattern)
def suggSimil (sWord, sPattern=None, bSubst=False):
"return list of words phonetically similar to sWord and whom POS is matching sPattern"
aSugg = phonet.selectSimil(sWord, sPattern)
for sMorph in _oSpellChecker.getMorph(sWord):
aSugg.update(conj.getSimil(sWord, sMorph, bSubst))
break
if aSugg:
return "|".join(aSugg)
return ""
def suggCeOrCet (sWord):
if re.match("(?i)[aeéèêiouyâîï]", sWord):
return "cet"
if sWord[0:1] == "h" or sWord[0:1] == "H":
return "ce|cet"
return "ce"
def suggLesLa (sWord):
if any( ":p" in sMorph for sMorph in _oSpellChecker.getMorph(sWord) ):
return "les|la"
return "la"
_zBinary = re.compile("^[01]+$")
def formatNumber (s):
|
| ︙ | ︙ |
Added gc_lang/fr/rules_graph.grx version [18deb74635].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#
# RÈGLES DE GRAMMAIRE FRANÇAISE POUR GRAMMALECTE
# par Olivier R.
#
# Copyright © 2011-2017.
#
# This file is part of Grammalecte.
#
# Grammalecte is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Grammalecte is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Grammalecte. If not, see <http://www.gnu.org/licenses/>
#
# RÈGLES POUR LE GRAPHE DE TOKENS
# DOCUMENTATION
# Expressions régulières en Python : http://docs.python.org/library/re.html
# [++] : séparateur des règles pour le paragraphe et des règles pour la phrase.
# Types d’action:
# ->> erreur
# ~>> préprocesseur de texte
# =>> désambiguïsateur
# Fin d’interprétation du fichier avec une ligne commençant par #END
# ERREURS COURANTES
# http://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Fautes_d%27orthographe/Courantes
__rule1__
les ~:N:.:s
des ~:N:.:s
ces ~:N:.:s
<<- -1>> acquit # Message0|http://test.grammalecte.net
__rule2__
ci important que soi
ci vraiment il y a
ci pour ça
<<- morph(\2, ":[WAR]", False) -1>> si # Message1|http://test.grammalecte.net
__rule3__
contre ([nature|pétrie|action]) par ([ennui|sélection])
<<- morph(\1, "xxxx") -1:2>> =\1+\2 # Message2|http://test.grammalecte.org
<<- ~1>> hyper|fonction
|
Modified graphspell-js/spellchecker.js from [3df103d578] to [a6bdb52bd3].
| ︙ | ︙ | |||
39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
this.oCommunityDic = this._loadDictionary(communityDic, sPath);
this.oPersonalDic = this._loadDictionary(personalDic, sPath);
this.bExtendedDic = Boolean(this.oExtendedDic);
this.bCommunityDic = Boolean(this.oCommunityDic);
this.bPersonalDic = Boolean(this.oPersonalDic);
this.oTokenizer = null;
}
_loadDictionary (dictionary, sPath="", bNecessary=false) {
// returns an IBDAWG object
if (!dictionary) {
return null;
}
| > > > > | 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
this.oExtendedDic = this._loadDictionary(extentedDic, sPath);
this.oCommunityDic = this._loadDictionary(communityDic, sPath);
this.oPersonalDic = this._loadDictionary(personalDic, sPath);
this.bExtendedDic = Boolean(this.oExtendedDic);
this.bCommunityDic = Boolean(this.oCommunityDic);
this.bPersonalDic = Boolean(this.oPersonalDic);
this.oTokenizer = null;
// storage
this.bStorage = false;
this._dMorphologies = new Map(); // key: flexion, value: list of morphologies
this._dLemmas = new Map(); // key: flexion, value: list of lemmas
}
_loadDictionary (dictionary, sPath="", bNecessary=false) {
// returns an IBDAWG object
if (!dictionary) {
return null;
}
|
| ︙ | ︙ | |||
130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
this.bCommunityDic = false;
}
deactivatePersonalDictionary () {
this.bPersonalDic = false;
}
// parse text functions
parseParagraph (sText) {
if (!this.oTokenizer) {
this.loadTokenizer();
}
| > > > > > > > > > > > > > > > > | 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
this.bCommunityDic = false;
}
deactivatePersonalDictionary () {
this.bPersonalDic = false;
}
// Storage
activateStorage () {
this.bStorage = true;
}
deactivateStorage () {
this.bStorage = false;
}
clearStorage () {
this._dLemmas.clear();
this._dMorphologies.clear();
}
// parse text functions
parseParagraph (sText) {
if (!this.oTokenizer) {
this.loadTokenizer();
}
|
| ︙ | ︙ | |||
201 202 203 204 205 206 207 |
return true;
}
return false;
}
getMorph (sWord) {
// retrieves morphologies list, different casing allowed
| > > > | | | | > > > > > | > > > > > > > > > > > | 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
return true;
}
return false;
}
getMorph (sWord) {
// retrieves morphologies list, different casing allowed
if (this.bStorage && this._dMorphologies.has(sWord)) {
return this._dMorphologies.get(sWord);
}
let lMorph = this.oMainDic.getMorph(sWord);
if (this.bExtendedDic) {
lMorph.push(...this.oExtendedDic.getMorph(sWord));
}
if (this.bCommunityDic) {
lMorph.push(...this.oCommunityDic.getMorph(sWord));
}
if (this.bPersonalDic) {
lMorph.push(...this.oPersonalDic.getMorph(sWord));
}
if (this.bStorage) {
this._dMorphologies.set(sWord, lMorph);
this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); }))));
//console.log(sWord, this._dLemmas.get(sWord));
}
return lMorph;
}
getLemma (sWord) {
// retrieves lemmas
if (this.bStorage) {
if (!this._dLemmas.has(sWord)) {
this.getMorph(sWord);
}
return this._dLemmas.get(sWord);
}
return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); })));
}
* suggest (sWord, nSuggLimit=10) {
// generator: returns 1, 2 or 3 lists of suggestions
yield this.oMainDic.suggest(sWord, nSuggLimit);
if (this.bExtendedDic) {
yield this.oExtendedDic.suggest(sWord, nSuggLimit);
|
| ︙ | ︙ |
Modified graphspell-js/tokenizer.js from [bdd895b918] to [9bd60cca8a].
| ︙ | ︙ | |||
14 15 16 17 18 19 20 |
const aTkzPatterns = {
// All regexps must start with ^.
"default":
[
[/^[ \t]+/, 'SPACE'],
[/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
[/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
| | | | 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
const aTkzPatterns = {
// All regexps must start with ^.
"default":
[
[/^[ \t]+/, 'SPACE'],
[/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
[/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
[/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]/, 'SEPARATOR'],
[/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
[/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^\d\d?h\d\d\b/, 'HOUR'],
[/^-?\d+(?:[.,]\d+|)/, 'NUM'],
[/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
],
"fr":
[
[/^[ \t]+/, 'SPACE'],
[/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
[/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
[/^[,.;:!?…«»“”‘’"(){}\[\]/·–—]/, 'SEPARATOR'],
[/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
[/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
[/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
[/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
[/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
[/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
[/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
|
| ︙ | ︙ | |||
58 59 60 61 62 63 64 |
this.sLang = "default";
}
this.aRules = aTkzPatterns[this.sLang];
}
* genTokens (sText) {
let m;
| | | > | < < < < | < | | | | 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
this.sLang = "default";
}
this.aRules = aTkzPatterns[this.sLang];
}
* genTokens (sText) {
let m;
let iNext = 0;
while (sText) {
let iCut = 1;
let iToken = 0;
for (let [zRegex, sType] of this.aRules) {
try {
if ((m = zRegex.exec(sText)) !== null) {
iToken += 1;
yield { "i": iToken, "sType": sType, "sValue": m[0], "nStart": iNext, "nEnd": iNext + m[0].length }
iCut = m[0].length;
break;
}
}
catch (e) {
helpers.logerror(e);
}
}
iNext += iCut;
sText = sText.slice(iCut);
}
}
}
if (typeof(exports) !== 'undefined') {
exports.Tokenizer = Tokenizer;
}
|
Modified graphspell/spellchecker.py from [cbd22d2c4d] to [2c7f3d8dbe].
| ︙ | ︙ | |||
32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oCommunityDic = self._loadDictionary(sfCommunityDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
self.bExtendedDic = bool(self.oExtendedDic)
self.bCommunityDic = bool(self.oCommunityDic)
self.bPersonalDic = bool(self.oPersonalDic)
self.oTokenizer = None
def _loadDictionary (self, source, bNecessary=False):
"returns an IBDAWG object"
if not source:
return None
try:
return ibdawg.IBDAWG(source)
| > > > > | 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
self.oExtendedDic = self._loadDictionary(sfExtendedDic)
self.oCommunityDic = self._loadDictionary(sfCommunityDic)
self.oPersonalDic = self._loadDictionary(sfPersonalDic)
self.bExtendedDic = bool(self.oExtendedDic)
self.bCommunityDic = bool(self.oCommunityDic)
self.bPersonalDic = bool(self.oPersonalDic)
self.oTokenizer = None
# storage
self.bStorage = False
self._dMorphologies = {} # key: flexion, value: list of morphologies
self._dLemmas = {} # key: flexion, value: list of lemmas
def _loadDictionary (self, source, bNecessary=False):
"returns an IBDAWG object"
if not source:
return None
try:
return ibdawg.IBDAWG(source)
|
| ︙ | ︙ | |||
95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
def deactivateCommunityDictionary (self):
self.bCommunityDic = False
def deactivatePersonalDictionary (self):
self.bPersonalDic = False
# parse text functions
def parseParagraph (self, sText, bSpellSugg=False):
if not self.oTokenizer:
self.loadTokenizer()
aSpellErrs = []
| > > > > > > > > > > > > > | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
def deactivateCommunityDictionary (self):
self.bCommunityDic = False
def deactivatePersonalDictionary (self):
self.bPersonalDic = False
# Storage
def activateStorage (self):
self.bStorage = True
def deactivateStorage (self):
self.bStorage = False
def clearStorage (self):
self._dLemmas.clear()
self._dMorphologies.clear()
# parse text functions
def parseParagraph (self, sText, bSpellSugg=False):
if not self.oTokenizer:
self.loadTokenizer()
aSpellErrs = []
|
| ︙ | ︙ | |||
167 168 169 170 171 172 173 |
return True
if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
return True
return False
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
| > > | | | | > > > | > > > > > | 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
return True
if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
return True
return False
def getMorph (self, sWord):
"retrieves morphologies list, different casing allowed"
if self.bStorage and sWord in self._dMorphologies:
return self._dMorphologies[sWord]
lMorph = self.oMainDic.getMorph(sWord)
if self.bExtendedDic:
lMorph.extend(self.oExtendedDic.getMorph(sWord))
if self.bCommunityDic:
lMorph.extend(self.oCommunityDic.getMorph(sWord))
if self.bPersonalDic:
lMorph.extend(self.oPersonalDic.getMorph(sWord))
if self.bStorage:
self._dMorphologies[sWord] = lMorph
self._dLemmas[sWord] = set([ s[1:s.find(" ")] for s in lMorph ])
return lMorph
def getLemma (self, sWord):
"retrieves lemmas"
if self.bStorage:
if sWord not in self._dLemmas:
self.getMorph(sWord)
return self._dLemmas[sWord]
return set([ s[1:s.find(" ")] for s in self.getMorph(sWord) ])
def suggest (self, sWord, nSuggLimit=10):
"generator: returns 1, 2 or 3 lists of suggestions"
yield self.oMainDic.suggest(sWord, nSuggLimit)
if self.bExtendedDic:
yield self.oExtendedDic.suggest(sWord, nSuggLimit)
|
| ︙ | ︙ |
Modified graphspell/tokenizer.py from [17f452887e] to [b3cbfe75ea].
1 2 3 4 5 6 7 8 9 |
# Very simple tokenizer
import re
_PATTERNS = {
"default":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
| | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# Very simple tokenizer
import re
_PATTERNS = {
"default":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}/·–—])',
r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r'(?P<HOUR>\d\d?h\d\d\b)',
r'(?P<NUM>-?\d+(?:[.,]\d+))',
r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
),
"fr":
(
r'(?P<FOLDERUNIX>/(?:bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:/[\w.()-]+)*)',
r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}/·–—])',
r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
r'(?P<HASHTAG>[#@][\w-]+)',
r'(?P<HTML><\w+.*?>|</\w+ *>)',
r'(?P<PSEUDOHTML>\[/?\w+\])',
r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
r'(?P<ORDINAL>\d+(?:er|nd|e|de|ième|ème|eme)\b)',
|
| ︙ | ︙ | |||
41 42 43 44 45 46 47 |
def __init__ (self, sLang):
self.sLang = sLang
if sLang not in _PATTERNS:
self.sLang = "default"
self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) )
def genTokens (self, sText):
| | | | 41 42 43 44 45 46 47 48 49 |
def __init__ (self, sLang):
self.sLang = sLang
if sLang not in _PATTERNS:
self.sLang = "default"
self.zToken = re.compile( "(?i)" + '|'.join(sRegex for sRegex in _PATTERNS[sLang]) )
def genTokens (self, sText):
for i, m in enumerate(self.zToken.finditer(sText), 1):
yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
|
Modified make.py from [14e0172bf2] to [b6664e27ed].
| ︙ | ︙ | |||
15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import json import platform from distutils import dir_util, file_util import dialog_bundled import compile_rules import helpers import lex_build sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n" | > | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | import json import platform from distutils import dir_util, file_util import dialog_bundled import compile_rules import compile_rules_graph import helpers import lex_build sWarningMessage = "The content of this folder is generated by code and replaced at each build.\n" |
| ︙ | ︙ | |||
189 190 191 192 193 194 195 |
spLang = "gc_lang/" + sLang
dVars = xConfig._sections['args']
dVars['locales'] = dVars["locales"].replace("_", "-")
dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ]))
## COMPILE RULES
| | | > > > | 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
spLang = "gc_lang/" + sLang
dVars = xConfig._sections['args']
dVars['locales'] = dVars["locales"].replace("_", "-")
dVars['loc'] = str(dict([ [s, [s[0:2], s[3:5], ""]] for s in dVars["locales"].split(" ") ]))
## COMPILE RULES
dResultRegex = compile_rules.make(spLang, dVars['lang'], bJavaScript)
dVars.update(dResultRegex)
dResultGraph = compile_rules_graph.make(spLang, dVars['lang'], bJavaScript)
dVars.update(dResultGraph)
## READ GRAMMAR CHECKER PLUGINS
print("PYTHON:")
print("+ Plugins: ", end="")
sCodePlugins = ""
for sf in os.listdir(spLang+"/modules"):
if re.match(r"gce_\w+[.]py$", sf):
|
| ︙ | ︙ | |||
225 226 227 228 229 230 231 |
helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
print(sf, end=", ")
print()
# TEST FILES
with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
| > | | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
helpers.copyAndFileTemplate(spLang+"/modules/"+sf, spLangPack+"/"+sf, dVars)
print(sf, end=", ")
print()
# TEST FILES
with open("grammalecte/"+sLang+"/gc_test.txt", "w", encoding="utf-8", newline="\n") as hDstPy:
hDstPy.write("# TESTS FOR LANG [" + sLang + "]\n\n")
hDstPy.write(dVars['regex_gctests'])
hDstPy.write(dVars['graph_gctests'])
createOXT(spLang, dVars, xConfig._sections['oxt'], spLangPack, bInstallOXT)
createServerOptions(sLang, dVars)
createPackageZip(sLang, dVars, spLangPack)
#### JAVASCRIPT
|
| ︙ | ︙ |