1 /**
  2  * The MIT License (MIT)
  3  *
  4  * Copyright (c) 2016 DeNA Co., Ltd.
  5  *
  6  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7  * of this software and associated documentation files (the "Software"), to deal
  8  * in the Software without restriction, including without limitation the rights
  9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10  * copies of the Software, and to permit persons to whom the Software is
 11  * furnished to do so, subject to the following conditions:
 12  *
 13  * The above copyright notice and this permission notice shall be included in
 14  * all copies or substantial portions of the Software.
 15  *
 16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22  * SOFTWARE.
 23  */
 24 
 25 /// <reference path="base.js"/>
 26 
 27 /**
 28  * A class that breaks text into words.
 29  * @constructor
 30  */
 31 createjs.WordBreaker = function() {
 32 };
 33 
 34 /**
 35  * Whether to support punctuation characters (U+2000...U+20FF).
 36  * @define {boolean}
 37  */
 38 createjs.WordBreaker.PUNCTUATION_CHARACTERS = false;
 39 
 40 /**
 41  * Whether to support CJK ideographic characters (U+3000...U+ABFF).
 42  * @define {boolean}
 43  */
 44 createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS = true;
 45 
 46 /**
 47  * Whether to support full-width alphabet forms (U+FF00...U+FFFF).
 48  * @define {boolean}
 49  */
 50 createjs.WordBreaker.FULLWIDTH_ALPHABETS = false;
 51 
 52 /**
 53  * Return whether the specified code allows inserting a word break before it.
 54  * @param {number} code
 55  * @return {boolean}
 56  * @private
 57  */
 58 createjs.WordBreaker.canBreakBefore_ = function(code) {
 59   /// <param type="number" name="code"/>
 60   /// <returns type="boolean"/>
 61   var high = code >> 8;
 62   var low = code & 0xff;
 63   if (high == 0x20) {
 64     if (createjs.WordBreaker.PUNCTUATION_CHARACTERS) {
 65       var NO_BREAK_BEFORE_2000 = [
 66         0x00000000, 0x00000040, 0x00000000, 0x00000000,
 67         0x00000000, 0x00000000, 0x00000000, 0x00000000
 68       ];
 69       return (NO_BREAK_BEFORE_2000[low >> 5] & (1 << (low & 0x1f))) == 0;
 70     }
 71   } else if (high == 0x30) {
 72     if (createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS) {
 73       var NO_BREAK_BEFORE_3000 = [
 74         0x0aa2aa06, 0x00000000, 0x000002aa, 0x00000008,
 75         0x000000a8, 0x000002aa, 0x00000008, 0x100000a8
 76       ];
 77       return (NO_BREAK_BEFORE_3000[low >> 5] & (1 << (low & 0x1f))) == 0;
 78     }
 79   } else if (high == 0xff) {
 80     if (createjs.WordBreaker.FULLWIDTH_ALPHABETS) {
 81       var NO_BREAK_BEFORE_FF00 = [
 82         0x80005000, 0x00000000, 0x00000000, 0x00000000,
 83         0x00000000, 0x00000000, 0x00000000, 0x00000000
 84       ];
 85       return (NO_BREAK_BEFORE_FF00[low >> 5] & (1 << (low & 0x1f))) == 0;
 86     }
 87   }
 88   return true;
 89 };
 90 
 91 /**
 92  * Return whether the specified code allows inserting a word break after it.
 93  * @param {number} code
 94  * @return {boolean}
 95  * @private
 96  */
 97 createjs.WordBreaker.canBreakAfter_ = function(code) {
 98   /// <param type="number" name="code"/>
 99   /// <returns type="boolean"/>
100   var high = code >> 8;
101   var low = code & 0xff;
102   if (high == 0x30) {
103     if (createjs.WordBreaker.IDEOGRAPHIC_CHARACTERS) {
104       var NO_BREAK_AFTER_3000 = [
105         0x05515500, 0x00000000, 0x00000000, 0x00000000,
106         0x00000000, 0x00000000, 0x00000000, 0x00000000
107       ];
108       return (NO_BREAK_AFTER_3000[low >> 5] & (1 << (low & 0x1f))) == 0;
109     }
110   } else if (high == 0xff) {
111     if (createjs.WordBreaker.FULLWIDTH_ALPHABETS) {
112       var NO_BREAK_AFTER_FF00 = [
113         0x00000100, 0x08000000, 0x08000000, 0x00000000,
114         0x00000000, 0x00000000, 0x00000000, 0x00000000
115       ];
116       return (NO_BREAK_AFTER_FF00[low >> 5] & (1 << (low & 0x1f))) == 0;
117     }
118   }
119   return true;
120 };
121 
122 /**
123  * Breaks text into words. This method splits text into a list of text segments
124  * where we can insert a line break. In brief, this method implements a subset
125  * of Unicode UAX #29.
126  * @param {string} text
127  * @return {Array.<string>}
128  */
129 createjs.WordBreaker.breakText = function(text) {
130   /// <param type="string" name="text"/>
131   /// <returns type="Array" elementType="string"/>
132   createjs.assert(text.length > 0);
133   var words = [];
134   var start = 0;
135   var previous = text.charCodeAt(0);
136   for (var i = 1; i < text.length; ++i) {
137     var code = text.charCodeAt(i);
138     if (code <= 0x20) {
139       if (i >= start) {
140         words.push(text.substring(start, i));
141       }
142       start = i;
143     } else if (0x3000 <= code && code < 0xac00) {
144       if (createjs.WordBreaker.canBreakAfter_(previous)) {
145         if (createjs.WordBreaker.canBreakBefore_(code)) {
146           if (i >= start) {
147             words.push(text.substring(start, i));
148           }
149           start = i;
150         }
151       }
152     }
153     previous = code;
154   }
155   if (start < text.length) {
156     words.push(text.substring(start));
157   }
158   return words;
159 };
160