Mercurial
comparison .cms/lib/codemirror/src/util/bidi.js @ 0:78edf6b517a0 draft
24.10
author | Coffee CMS <info@coffee-cms.ru> |
---|---|
date | Fri, 11 Oct 2024 22:40:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:78edf6b517a0 |
---|---|
1 import { lst } from "./misc.js" | |
2 | |
3 // BIDI HELPERS | |
4 | |
5 export function iterateBidiSections(order, from, to, f) { | |
6 if (!order) return f(from, to, "ltr", 0) | |
7 let found = false | |
8 for (let i = 0; i < order.length; ++i) { | |
9 let part = order[i] | |
10 if (part.from < to && part.to > from || from == to && part.to == from) { | |
11 f(Math.max(part.from, from), Math.min(part.to, to), part.level == 1 ? "rtl" : "ltr", i) | |
12 found = true | |
13 } | |
14 } | |
15 if (!found) f(from, to, "ltr") | |
16 } | |
17 | |
18 export let bidiOther = null | |
19 export function getBidiPartAt(order, ch, sticky) { | |
20 let found | |
21 bidiOther = null | |
22 for (let i = 0; i < order.length; ++i) { | |
23 let cur = order[i] | |
24 if (cur.from < ch && cur.to > ch) return i | |
25 if (cur.to == ch) { | |
26 if (cur.from != cur.to && sticky == "before") found = i | |
27 else bidiOther = i | |
28 } | |
29 if (cur.from == ch) { | |
30 if (cur.from != cur.to && sticky != "before") found = i | |
31 else bidiOther = i | |
32 } | |
33 } | |
34 return found != null ? found : bidiOther | |
35 } | |
36 | |
37 // Bidirectional ordering algorithm | |
38 // See http://unicode.org/reports/tr9/tr9-13.html for the algorithm | |
39 // that this (partially) implements. | |
40 | |
41 // One-char codes used for character types: | |
42 // L (L): Left-to-Right | |
43 // R (R): Right-to-Left | |
44 // r (AL): Right-to-Left Arabic | |
45 // 1 (EN): European Number | |
46 // + (ES): European Number Separator | |
47 // % (ET): European Number Terminator | |
48 // n (AN): Arabic Number | |
49 // , (CS): Common Number Separator | |
50 // m (NSM): Non-Spacing Mark | |
51 // b (BN): Boundary Neutral | |
52 // s (B): Paragraph Separator | |
53 // t (S): Segment Separator | |
54 // w (WS): Whitespace | |
55 // N (ON): Other Neutrals | |
56 | |
57 // Returns null if characters are ordered as they appear | |
58 // (left-to-right), or an array of sections ({from, to, level} | |
59 // objects) in the order in which they occur visually. | |
60 let bidiOrdering = (function() { | |
61 // Character types for codepoints 0 to 0xff | |
62 let lowTypes = "bbbbbbbbbtstwsbbbbbbbbbbbbbbssstwNN%%%NNNNNN,N,N1111111111NNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNbbbbbbsbbbbbbbbbbbbbbbbbbbbbbbbbb,N%%%%NNNNLNNNNN%%11NLNNN1LNNNNNLLLLLLLLLLLLLLLLLLLLLLLNLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLN" | |
63 // Character types for codepoints 0x600 to 0x6f9 | |
64 let arabicTypes = "nnnnnnNNr%%r,rNNmmmmmmmmmmmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnn%nnrrrmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmnNmmmmmmrrmmNmmmmrr1111111111" | |
65 function charType(code) { | |
66 if (code <= 0xf7) return lowTypes.charAt(code) | |
67 else if (0x590 <= code && code <= 0x5f4) return "R" | |
68 else if (0x600 <= code && code <= 0x6f9) return arabicTypes.charAt(code - 0x600) | |
69 else if (0x6ee <= code && code <= 0x8ac) return "r" | |
70 else if (0x2000 <= code && code <= 0x200b) return "w" | |
71 else if (code == 0x200c) return "b" | |
72 else return "L" | |
73 } | |
74 | |
75 let bidiRE = /[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/ | |
76 let isNeutral = /[stwN]/, isStrong = /[LRr]/, countsAsLeft = /[Lb1n]/, countsAsNum = /[1n]/ | |
77 | |
78 function BidiSpan(level, from, to) { | |
79 this.level = level | |
80 this.from = from; this.to = to | |
81 } | |
82 | |
83 return function(str, direction) { | |
84 let outerType = direction == "ltr" ? "L" : "R" | |
85 | |
86 if (str.length == 0 || direction == "ltr" && !bidiRE.test(str)) return false | |
87 let len = str.length, types = [] | |
88 for (let i = 0; i < len; ++i) | |
89 types.push(charType(str.charCodeAt(i))) | |
90 | |
91 // W1. Examine each non-spacing mark (NSM) in the level run, and | |
92 // change the type of the NSM to the type of the previous | |
93 // character. If the NSM is at the start of the level run, it will | |
94 // get the type of sor. | |
95 for (let i = 0, prev = outerType; i < len; ++i) { | |
96 let type = types[i] | |
97 if (type == "m") types[i] = prev | |
98 else prev = type | |
99 } | |
100 | |
101 // W2. Search backwards from each instance of a European number | |
102 // until the first strong type (R, L, AL, or sor) is found. If an | |
103 // AL is found, change the type of the European number to Arabic | |
104 // number. | |
105 // W3. Change all ALs to R. | |
106 for (let i = 0, cur = outerType; i < len; ++i) { | |
107 let type = types[i] | |
108 if (type == "1" && cur == "r") types[i] = "n" | |
109 else if (isStrong.test(type)) { cur = type; if (type == "r") types[i] = "R" } | |
110 } | |
111 | |
112 // W4. A single European separator between two European numbers | |
113 // changes to a European number. A single common separator between | |
114 // two numbers of the same type changes to that type. | |
115 for (let i = 1, prev = types[0]; i < len - 1; ++i) { | |
116 let type = types[i] | |
117 if (type == "+" && prev == "1" && types[i+1] == "1") types[i] = "1" | |
118 else if (type == "," && prev == types[i+1] && | |
119 (prev == "1" || prev == "n")) types[i] = prev | |
120 prev = type | |
121 } | |
122 | |
123 // W5. A sequence of European terminators adjacent to European | |
124 // numbers changes to all European numbers. | |
125 // W6. Otherwise, separators and terminators change to Other | |
126 // Neutral. | |
127 for (let i = 0; i < len; ++i) { | |
128 let type = types[i] | |
129 if (type == ",") types[i] = "N" | |
130 else if (type == "%") { | |
131 let end | |
132 for (end = i + 1; end < len && types[end] == "%"; ++end) {} | |
133 let replace = (i && types[i-1] == "!") || (end < len && types[end] == "1") ? "1" : "N" | |
134 for (let j = i; j < end; ++j) types[j] = replace | |
135 i = end - 1 | |
136 } | |
137 } | |
138 | |
139 // W7. Search backwards from each instance of a European number | |
140 // until the first strong type (R, L, or sor) is found. If an L is | |
141 // found, then change the type of the European number to L. | |
142 for (let i = 0, cur = outerType; i < len; ++i) { | |
143 let type = types[i] | |
144 if (cur == "L" && type == "1") types[i] = "L" | |
145 else if (isStrong.test(type)) cur = type | |
146 } | |
147 | |
148 // N1. A sequence of neutrals takes the direction of the | |
149 // surrounding strong text if the text on both sides has the same | |
150 // direction. European and Arabic numbers act as if they were R in | |
151 // terms of their influence on neutrals. Start-of-level-run (sor) | |
152 // and end-of-level-run (eor) are used at level run boundaries. | |
153 // N2. Any remaining neutrals take the embedding direction. | |
154 for (let i = 0; i < len; ++i) { | |
155 if (isNeutral.test(types[i])) { | |
156 let end | |
157 for (end = i + 1; end < len && isNeutral.test(types[end]); ++end) {} | |
158 let before = (i ? types[i-1] : outerType) == "L" | |
159 let after = (end < len ? types[end] : outerType) == "L" | |
160 let replace = before == after ? (before ? "L" : "R") : outerType | |
161 for (let j = i; j < end; ++j) types[j] = replace | |
162 i = end - 1 | |
163 } | |
164 } | |
165 | |
166 // Here we depart from the documented algorithm, in order to avoid | |
167 // building up an actual levels array. Since there are only three | |
168 // levels (0, 1, 2) in an implementation that doesn't take | |
169 // explicit embedding into account, we can build up the order on | |
170 // the fly, without following the level-based algorithm. | |
171 let order = [], m | |
172 for (let i = 0; i < len;) { | |
173 if (countsAsLeft.test(types[i])) { | |
174 let start = i | |
175 for (++i; i < len && countsAsLeft.test(types[i]); ++i) {} | |
176 order.push(new BidiSpan(0, start, i)) | |
177 } else { | |
178 let pos = i, at = order.length, isRTL = direction == "rtl" ? 1 : 0 | |
179 for (++i; i < len && types[i] != "L"; ++i) {} | |
180 for (let j = pos; j < i;) { | |
181 if (countsAsNum.test(types[j])) { | |
182 if (pos < j) { order.splice(at, 0, new BidiSpan(1, pos, j)); at += isRTL } | |
183 let nstart = j | |
184 for (++j; j < i && countsAsNum.test(types[j]); ++j) {} | |
185 order.splice(at, 0, new BidiSpan(2, nstart, j)) | |
186 at += isRTL | |
187 pos = j | |
188 } else ++j | |
189 } | |
190 if (pos < i) order.splice(at, 0, new BidiSpan(1, pos, i)) | |
191 } | |
192 } | |
193 if (direction == "ltr") { | |
194 if (order[0].level == 1 && (m = str.match(/^\s+/))) { | |
195 order[0].from = m[0].length | |
196 order.unshift(new BidiSpan(0, 0, m[0].length)) | |
197 } | |
198 if (lst(order).level == 1 && (m = str.match(/\s+$/))) { | |
199 lst(order).to -= m[0].length | |
200 order.push(new BidiSpan(0, len - m[0].length, len)) | |
201 } | |
202 } | |
203 | |
204 return direction == "rtl" ? order.reverse() : order | |
205 } | |
206 })() | |
207 | |
208 // Get the bidi ordering for the given line (and cache it). Returns | |
209 // false for lines that are fully left-to-right, and an array of | |
210 // BidiSpan objects otherwise. | |
211 export function getOrder(line, direction) { | |
212 let order = line.order | |
213 if (order == null) order = line.order = bidiOrdering(line.text, direction) | |
214 return order | |
215 } |