0
|
1 import { lst } from "./misc.js"
|
|
2
|
|
3 // BIDI HELPERS
|
|
4
|
|
5 export function iterateBidiSections(order, from, to, f) {
|
|
6 if (!order) return f(from, to, "ltr", 0)
|
|
7 let found = false
|
|
8 for (let i = 0; i < order.length; ++i) {
|
|
9 let part = order[i]
|
|
10 if (part.from < to && part.to > from || from == to && part.to == from) {
|
|
11 f(Math.max(part.from, from), Math.min(part.to, to), part.level == 1 ? "rtl" : "ltr", i)
|
|
12 found = true
|
|
13 }
|
|
14 }
|
|
15 if (!found) f(from, to, "ltr")
|
|
16 }
|
|
17
|
|
18 export let bidiOther = null
|
|
19 export function getBidiPartAt(order, ch, sticky) {
|
|
20 let found
|
|
21 bidiOther = null
|
|
22 for (let i = 0; i < order.length; ++i) {
|
|
23 let cur = order[i]
|
|
24 if (cur.from < ch && cur.to > ch) return i
|
|
25 if (cur.to == ch) {
|
|
26 if (cur.from != cur.to && sticky == "before") found = i
|
|
27 else bidiOther = i
|
|
28 }
|
|
29 if (cur.from == ch) {
|
|
30 if (cur.from != cur.to && sticky != "before") found = i
|
|
31 else bidiOther = i
|
|
32 }
|
|
33 }
|
|
34 return found != null ? found : bidiOther
|
|
35 }
|
|
36
|
|
37 // Bidirectional ordering algorithm
|
|
38 // See http://unicode.org/reports/tr9/tr9-13.html for the algorithm
|
|
39 // that this (partially) implements.
|
|
40
|
|
41 // One-char codes used for character types:
|
|
42 // L (L): Left-to-Right
|
|
43 // R (R): Right-to-Left
|
|
44 // r (AL): Right-to-Left Arabic
|
|
45 // 1 (EN): European Number
|
|
46 // + (ES): European Number Separator
|
|
47 // % (ET): European Number Terminator
|
|
48 // n (AN): Arabic Number
|
|
49 // , (CS): Common Number Separator
|
|
50 // m (NSM): Non-Spacing Mark
|
|
51 // b (BN): Boundary Neutral
|
|
52 // s (B): Paragraph Separator
|
|
53 // t (S): Segment Separator
|
|
54 // w (WS): Whitespace
|
|
55 // N (ON): Other Neutrals
|
|
56
|
|
57 // Returns null if characters are ordered as they appear
|
|
58 // (left-to-right), or an array of sections ({from, to, level}
|
|
59 // objects) in the order in which they occur visually.
|
|
60 let bidiOrdering = (function() {
|
|
61 // Character types for codepoints 0 to 0xff
|
|
62 let lowTypes = "bbbbbbbbbtstwsbbbbbbbbbbbbbbssstwNN%%%NNNNNN,N,N1111111111NNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNbbbbbbsbbbbbbbbbbbbbbbbbbbbbbbbbb,N%%%%NNNNLNNNNN%%11NLNNN1LNNNNNLLLLLLLLLLLLLLLLLLLLLLLNLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLN"
|
|
63 // Character types for codepoints 0x600 to 0x6f9
|
|
64 let arabicTypes = "nnnnnnNNr%%r,rNNmmmmmmmmmmmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnn%nnrrrmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmnNmmmmmmrrmmNmmmmrr1111111111"
|
|
65 function charType(code) {
|
|
66 if (code <= 0xf7) return lowTypes.charAt(code)
|
|
67 else if (0x590 <= code && code <= 0x5f4) return "R"
|
|
68 else if (0x600 <= code && code <= 0x6f9) return arabicTypes.charAt(code - 0x600)
|
|
69 else if (0x6ee <= code && code <= 0x8ac) return "r"
|
|
70 else if (0x2000 <= code && code <= 0x200b) return "w"
|
|
71 else if (code == 0x200c) return "b"
|
|
72 else return "L"
|
|
73 }
|
|
74
|
|
75 let bidiRE = /[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/
|
|
76 let isNeutral = /[stwN]/, isStrong = /[LRr]/, countsAsLeft = /[Lb1n]/, countsAsNum = /[1n]/
|
|
77
|
|
78 function BidiSpan(level, from, to) {
|
|
79 this.level = level
|
|
80 this.from = from; this.to = to
|
|
81 }
|
|
82
|
|
83 return function(str, direction) {
|
|
84 let outerType = direction == "ltr" ? "L" : "R"
|
|
85
|
|
86 if (str.length == 0 || direction == "ltr" && !bidiRE.test(str)) return false
|
|
87 let len = str.length, types = []
|
|
88 for (let i = 0; i < len; ++i)
|
|
89 types.push(charType(str.charCodeAt(i)))
|
|
90
|
|
91 // W1. Examine each non-spacing mark (NSM) in the level run, and
|
|
92 // change the type of the NSM to the type of the previous
|
|
93 // character. If the NSM is at the start of the level run, it will
|
|
94 // get the type of sor.
|
|
95 for (let i = 0, prev = outerType; i < len; ++i) {
|
|
96 let type = types[i]
|
|
97 if (type == "m") types[i] = prev
|
|
98 else prev = type
|
|
99 }
|
|
100
|
|
101 // W2. Search backwards from each instance of a European number
|
|
102 // until the first strong type (R, L, AL, or sor) is found. If an
|
|
103 // AL is found, change the type of the European number to Arabic
|
|
104 // number.
|
|
105 // W3. Change all ALs to R.
|
|
106 for (let i = 0, cur = outerType; i < len; ++i) {
|
|
107 let type = types[i]
|
|
108 if (type == "1" && cur == "r") types[i] = "n"
|
|
109 else if (isStrong.test(type)) { cur = type; if (type == "r") types[i] = "R" }
|
|
110 }
|
|
111
|
|
112 // W4. A single European separator between two European numbers
|
|
113 // changes to a European number. A single common separator between
|
|
114 // two numbers of the same type changes to that type.
|
|
115 for (let i = 1, prev = types[0]; i < len - 1; ++i) {
|
|
116 let type = types[i]
|
|
117 if (type == "+" && prev == "1" && types[i+1] == "1") types[i] = "1"
|
|
118 else if (type == "," && prev == types[i+1] &&
|
|
119 (prev == "1" || prev == "n")) types[i] = prev
|
|
120 prev = type
|
|
121 }
|
|
122
|
|
123 // W5. A sequence of European terminators adjacent to European
|
|
124 // numbers changes to all European numbers.
|
|
125 // W6. Otherwise, separators and terminators change to Other
|
|
126 // Neutral.
|
|
127 for (let i = 0; i < len; ++i) {
|
|
128 let type = types[i]
|
|
129 if (type == ",") types[i] = "N"
|
|
130 else if (type == "%") {
|
|
131 let end
|
|
132 for (end = i + 1; end < len && types[end] == "%"; ++end) {}
|
|
133 let replace = (i && types[i-1] == "!") || (end < len && types[end] == "1") ? "1" : "N"
|
|
134 for (let j = i; j < end; ++j) types[j] = replace
|
|
135 i = end - 1
|
|
136 }
|
|
137 }
|
|
138
|
|
139 // W7. Search backwards from each instance of a European number
|
|
140 // until the first strong type (R, L, or sor) is found. If an L is
|
|
141 // found, then change the type of the European number to L.
|
|
142 for (let i = 0, cur = outerType; i < len; ++i) {
|
|
143 let type = types[i]
|
|
144 if (cur == "L" && type == "1") types[i] = "L"
|
|
145 else if (isStrong.test(type)) cur = type
|
|
146 }
|
|
147
|
|
148 // N1. A sequence of neutrals takes the direction of the
|
|
149 // surrounding strong text if the text on both sides has the same
|
|
150 // direction. European and Arabic numbers act as if they were R in
|
|
151 // terms of their influence on neutrals. Start-of-level-run (sor)
|
|
152 // and end-of-level-run (eor) are used at level run boundaries.
|
|
153 // N2. Any remaining neutrals take the embedding direction.
|
|
154 for (let i = 0; i < len; ++i) {
|
|
155 if (isNeutral.test(types[i])) {
|
|
156 let end
|
|
157 for (end = i + 1; end < len && isNeutral.test(types[end]); ++end) {}
|
|
158 let before = (i ? types[i-1] : outerType) == "L"
|
|
159 let after = (end < len ? types[end] : outerType) == "L"
|
|
160 let replace = before == after ? (before ? "L" : "R") : outerType
|
|
161 for (let j = i; j < end; ++j) types[j] = replace
|
|
162 i = end - 1
|
|
163 }
|
|
164 }
|
|
165
|
|
166 // Here we depart from the documented algorithm, in order to avoid
|
|
167 // building up an actual levels array. Since there are only three
|
|
168 // levels (0, 1, 2) in an implementation that doesn't take
|
|
169 // explicit embedding into account, we can build up the order on
|
|
170 // the fly, without following the level-based algorithm.
|
|
171 let order = [], m
|
|
172 for (let i = 0; i < len;) {
|
|
173 if (countsAsLeft.test(types[i])) {
|
|
174 let start = i
|
|
175 for (++i; i < len && countsAsLeft.test(types[i]); ++i) {}
|
|
176 order.push(new BidiSpan(0, start, i))
|
|
177 } else {
|
|
178 let pos = i, at = order.length, isRTL = direction == "rtl" ? 1 : 0
|
|
179 for (++i; i < len && types[i] != "L"; ++i) {}
|
|
180 for (let j = pos; j < i;) {
|
|
181 if (countsAsNum.test(types[j])) {
|
|
182 if (pos < j) { order.splice(at, 0, new BidiSpan(1, pos, j)); at += isRTL }
|
|
183 let nstart = j
|
|
184 for (++j; j < i && countsAsNum.test(types[j]); ++j) {}
|
|
185 order.splice(at, 0, new BidiSpan(2, nstart, j))
|
|
186 at += isRTL
|
|
187 pos = j
|
|
188 } else ++j
|
|
189 }
|
|
190 if (pos < i) order.splice(at, 0, new BidiSpan(1, pos, i))
|
|
191 }
|
|
192 }
|
|
193 if (direction == "ltr") {
|
|
194 if (order[0].level == 1 && (m = str.match(/^\s+/))) {
|
|
195 order[0].from = m[0].length
|
|
196 order.unshift(new BidiSpan(0, 0, m[0].length))
|
|
197 }
|
|
198 if (lst(order).level == 1 && (m = str.match(/\s+$/))) {
|
|
199 lst(order).to -= m[0].length
|
|
200 order.push(new BidiSpan(0, len - m[0].length, len))
|
|
201 }
|
|
202 }
|
|
203
|
|
204 return direction == "rtl" ? order.reverse() : order
|
|
205 }
|
|
206 })()
|
|
207
|
|
208 // Get the bidi ordering for the given line (and cache it). Returns
|
|
209 // false for lines that are fully left-to-right, and an array of
|
|
210 // BidiSpan objects otherwise.
|
|
211 export function getOrder(line, direction) {
|
|
212 let order = line.order
|
|
213 if (order == null) order = line.order = bidiOrdering(line.text, direction)
|
|
214 return order
|
|
215 }
|