1 /** 2 * The MIT License (MIT) 3 * 4 * Copyright (c) 2016 DeNA Co., Ltd. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 /// <reference path="base.js"/> 26 27 /** 28 * A class that breaks text into words. 29 * @constructor 30 */ 31 createjs.WordBreaker = function() { 32 }; 33 34 /** 35 * Whether to support punctuation characters (U+2000...U+20FF). 36 * @define {boolean} 37 */ 38 createjs.WordBreaker.PUNCTUATION_CHARACTERS = false; 39 40 /** 41 * Whether to support CJK ideographic characters (U+3000...U+ABFF). 42 * @define {boolean} 43 */ 44 createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS = true; 45 46 /** 47 * Whether to support full-width alphabet forms (U+FF00...U+FFFF). 48 * @define {boolean} 49 */ 50 createjs.WordBreaker.FULLWIDTH_ALPHABETS = false; 51 52 /** 53 * Return whether the specified code allows inserting a word break before it. 54 * @param {number} code 55 * @return {boolean} 56 * @private 57 */ 58 createjs.WordBreaker.canBreakBefore_ = function(code) { 59 /// <param type="number" name="code"/> 60 /// <returns type="boolean"/> 61 var high = code >> 8; 62 var low = code & 0xff; 63 if (high == 0x20) { 64 if (createjs.WordBreaker.PUNCTUATION_CHARACTERS) { 65 var NO_BREAK_BEFORE_2000 = [ 66 0x00000000, 0x00000040, 0x00000000, 0x00000000, 67 0x00000000, 0x00000000, 0x00000000, 0x00000000 68 ]; 69 return (NO_BREAK_BEFORE_2000[low >> 5] & (1 << (low & 0x1f))) == 0; 70 } 71 } else if (high == 0x30) { 72 if (createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS) { 73 var NO_BREAK_BEFORE_3000 = [ 74 0x0aa2aa06, 0x00000000, 0x000002aa, 0x00000008, 75 0x000000a8, 0x000002aa, 0x00000008, 0x100000a8 76 ]; 77 return (NO_BREAK_BEFORE_3000[low >> 5] & (1 << (low & 0x1f))) == 0; 78 } 79 } else if (high == 0xff) { 80 if (createjs.WordBreaker.FULLWIDTH_ALPHABETS) { 81 var NO_BREAK_BEFORE_FF00 = [ 82 0x80005000, 0x00000000, 0x00000000, 0x00000000, 83 0x00000000, 0x00000000, 0x00000000, 0x00000000 84 ]; 85 return (NO_BREAK_BEFORE_FF00[low >> 5] & (1 << (low & 0x1f))) == 0; 86 } 87 } 88 return true; 89 }; 90 91 /** 92 * Return whether the specified code allows inserting a word break after it. 93 * @param {number} code 94 * @return {boolean} 95 * @private 96 */ 97 createjs.WordBreaker.canBreakAfter_ = function(code) { 98 /// <param type="number" name="code"/> 99 /// <returns type="boolean"/> 100 var high = code >> 8; 101 var low = code & 0xff; 102 if (high == 0x30) { 103 if (createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS) { 104 var NO_BREAK_AFTER_3000 = [ 105 0x05515500, 0x00000000, 0x00000000, 0x00000000, 106 0x00000000, 0x00000000, 0x00000000, 0x00000000 107 ]; 108 return (NO_BREAK_AFTER_3000[low >> 5] & (1 << (low & 0x1f))) == 0; 109 } 110 } else if (high == 0xff) { 111 if (createjs.WordBreaker.FULLWIDTH_ALPHABETS) { 112 var NO_BREAK_AFTER_FF00 = [ 113 0x00000100, 0x08000000, 0x08000000, 0x00000000, 114 0x00000000, 0x00000000, 0x00000000, 0x00000000 115 ]; 116 return (NO_BREAK_AFTER_FF00[low >> 5] & (1 << (low & 0x1f))) == 0; 117 } 118 } 119 return true; 120 }; 121 122 /** 123 * Breaks text into words. This method splits text into a list of text segments 124 * where we can insert a line break. In brief, this method implements a subset 125 * of Unicode UAX #29. 126 * @param {string} text 127 * @return {Array.<string>} 128 */ 129 createjs.WordBreaker.breakText = function(text) { 130 /// <param type="string" name="text"/> 131 /// <returns type="Array" elementType="string"/> 132 createjs.assert(text.length > 0); 133 var words = []; 134 var start = 0; 135 var previous = text.charCodeAt(0); 136 for (var i = 1; i < text.length; ++i) { 137 var code = text.charCodeAt(i); 138 if (code <= 0x20) { 139 if (i >= start) { 140 words.push(text.substring(start, i)); 141 } 142 start = i; 143 } else if (0x3000 <= code && code < 0xac00) { 144 if (createjs.WordBreaker.canBreakAfter_(previous)) { 145 if (createjs.WordBreaker.canBreakBefore_(code)) { 146 if (i >= start) { 147 words.push(text.substring(start, i)); 148 } 149 start = i; 150 } 151 } 152 } 153 previous = code; 154 } 155 if (start < text.length) { 156 words.push(text.substring(start)); 157 } 158 return words; 159 }; 160