001 /*
002 * Java Genetic Algorithm Library (jenetics-1.6.0).
003 * Copyright (c) 2007-2014 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.util;
021
022 import static java.lang.String.format;
023 import static java.util.Objects.requireNonNull;
024 import static org.jenetics.internal.util.object.eq;
025
026 import java.io.Serializable;
027 import java.util.Arrays;
028 import java.util.Iterator;
029 import java.util.NoSuchElementException;
030 import java.util.regex.PatternSyntaxException;
031
032 import javolution.lang.Immutable;
033
034 import org.jenetics.internal.util.HashBuilder;
035
036 /**
037 * This class is used for holding the valid characters of an
038 * {@link org.jenetics.CharacterGene}. It is not a character sequence in the
039 * classical sense. The characters of this sequence are sorted and doesn't
040 * contain duplicate values, like a set.
041 *
042 * [code]
043 * final CharSeq cs1 = new CharSeq("abcdeaafg");
044 * final CharSeq cs2 = new CharSeq("gfedcbabb");
045 * assert(cs1.equals(cs2));
046 * [/code]
047 *
048 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
049 * @since 1.0
050 * @version 1.6 — <em>$Date: 2014-03-01 $</em>
051 */
052 public final class CharSeq
053 extends AbstractCharSeq
054 implements
055 CharSequence,
056 ISeq<Character>,
057 Comparable<CharSeq>,
058 Immutable,
059 Serializable
060 {
061 private static final long serialVersionUID = 2L;
062
063 /**
064 * Create a new (distinct) CharSeq from the given {@code characters}. The
065 * given {@link CharSequence} is sorted and duplicate values are removed
066 *
067 * @see #CharSeq(CharSequence)
068 *
069 * @param characters the characters.
070 * @throws NullPointerException if the {@code characters} are {@code null}.
071 */
072 public CharSeq(final char[] characters) {
073 super(distinct(characters.clone()));
074 }
075
076 /**
077 * Create a new (distinct) CharSeq from the given {@code characters}. The
078 * given {@link CharSequence} is sorted and duplicate values are removed.
079 *
080 * @param characters the characters.
081 * @throws NullPointerException if the {@code characters} are {@code null}.
082 */
083 public CharSeq(final CharSequence characters) {
084 this(toCharArray(characters));
085 }
086
087 private static char[] toCharArray(final CharSequence characters) {
088 requireNonNull(characters, "Characters");
089
090 final char[] chars = new char[characters.length()];
091 for (int i = 0; i < characters.length(); ++i) {
092 chars[i] = characters.charAt(i);
093 }
094
095 return chars;
096 }
097
098 private static char[] distinct(final char[] chars) {
099 char[] result = chars;
100
101 if (chars.length > 0) {
102 Arrays.sort(result);
103
104 int nextIndex = 0;
105 int count = 1;
106 char last = result[0];
107
108 for (int i = 1; i < result.length; ++i) {
109 while (nextIndex < result.length && result[nextIndex] == last) {
110 ++nextIndex;
111 }
112 if (nextIndex < result.length) {
113 last = result[nextIndex];
114 result[i] = last;
115 ++count;
116 }
117 }
118
119 char[] array = new char[count];
120 System.arraycopy(result, 0, array, 0, count);
121 result = array;
122 }
123
124 return result;
125 }
126
127 @Override
128 public boolean contains(final Object object) {
129 boolean contains = false;
130 if (object instanceof Character) {
131 contains = contains((Character)object);
132 }
133
134 return contains;
135 }
136
137 /**
138 * Test whether this character set contains the given character {@code c}.
139 *
140 * @param c the character to test.
141 * @return {@code true} if this character set contains the given character,
142 * {@code false} otherwise.
143 * @throws NullPointerException if the given character {@code c} is
144 * {@code null}.
145 */
146 public boolean contains(final Character c) {
147 return contains(c.charValue());
148 }
149
150 /**
151 * Test whether this character set contains the given character {@code c}.
152 *
153 * @param c the character to test.
154 * @return {@code true} if this character set contains the given character,
155 * {@code false} otherwise.
156 */
157 public boolean contains(final char c) {
158 return Arrays.binarySearch(_characters, c) >= 0;
159 }
160
161 @Override
162 public char charAt(int index) {
163 return _characters[index];
164 }
165
166 @Override
167 public int length() {
168 return _characters.length;
169 }
170
171 @Override
172 public CharSeq subSequence(int start, int end) {
173 return new CharSeq(new String(_characters, start, end - start));
174 }
175
176 /**
177 * Test whether this character set is empty.
178 *
179 * @return {@code true} if this character set is empty, {@code false}
180 * otherwise.
181 */
182 public boolean isEmpty() {
183 return _characters.length == 0;
184 }
185
186 @Override
187 public Iterator<Character> iterator() {
188 return new Iterator<Character>() {
189 private int _pos = 0;
190 @Override public boolean hasNext() {
191 return _pos < _characters.length;
192 }
193 @Override public Character next() {
194 if (!hasNext()) {
195 throw new NoSuchElementException(format(
196 "Index %s is out of range [0, %s)",
197 _pos, _characters.length
198 ));
199 }
200 return _characters[_pos++];
201 }
202 @Override public void remove() {
203 throw new UnsupportedOperationException();
204 }
205 };
206 }
207
208 @Override
209 public int hashCode() {
210 return HashBuilder.of(getClass()).and(_characters).value();
211 }
212
213 @Override
214 public boolean equals(final Object object) {
215 if (object == this) {
216 return true;
217 }
218 if (!(object instanceof CharSeq)) {
219 return false;
220 }
221
222 final CharSeq ch = (CharSeq)object;
223 return eq(_characters, ch._characters);
224 }
225
226 @Override
227 public int compareTo(final CharSeq set) {
228 int result = 0;
229
230 final int n = Math.min(_characters.length, set._characters.length);
231 for (int i = 0; i < n && result == 0; ++i) {
232 result = _characters[i] - set._characters[i];
233 }
234 if (result == 0) {
235 result = _characters.length - set._characters.length;
236 }
237
238 return result;
239 }
240
241 @Override
242 public String toString() {
243 return new String(_characters);
244 }
245
246 /**
247 * Expands the character range for the given {@code pattern}. E.g
248 * {@code a-zA-Z0-1} will return a string containing all upper and lower
249 * case characters (from a to z) and all digits form 0 to 9.
250 *
251 * @param pattern the {@code pattern} to expand.
252 * @return the expanded pattern.
253 * @throws PatternSyntaxException if the pattern could not be expanded.
254 * @throws NullPointerException if the pattern is {@code null}.
255 */
256 public static String expand(final CharSequence pattern) {
257 requireNonNull(pattern, "Pattern");
258 final StringBuilder out = new StringBuilder();
259
260 for (int i = 0, n = pattern.length(); i < n; ++i) {
261 if (pattern.charAt(i) == '\\') {
262 ++i;
263 if (i < pattern.length()) {
264 out.append(pattern.charAt(i));
265 }
266 } else if (pattern.charAt(i) == '-') {
267 if (i <= 0 || i >= (pattern.length() - 1)) {
268 throw new PatternSyntaxException(
269 "Dangling range operator '-'", pattern.toString(),
270 pattern.length() - 1
271 );
272 }
273
274 final String range = expand(
275 pattern.charAt(i - 1),
276 pattern.charAt(i + 1)
277 );
278 out.append(range);
279
280 ++i;
281 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
282 out.append(pattern.charAt(i));
283 }
284 }
285
286 return out.toString();
287 }
288
289 /**
290 * Expands the characters between {@code a} and {@code b}.
291 *
292 * @param a the start character.
293 * @param b the stop character.
294 * @return the expanded characters.
295 */
296 public static String expand(final char a, final char b) {
297 final StringBuilder out = new StringBuilder();
298
299 if (a < b) {
300 char c = a;
301 while (c <= b) {
302 out.append(c);
303 c = (char) (c + 1);
304 }
305 } else if (a > b) {
306 char c = a;
307 while (c >= b) {
308 out.append(c);
309 c = (char) (c - 1);
310 }
311 }
312
313 return out.toString();
314 }
315
316 /**
317 * Expands the character range for the given {@code pattern}. E.g
318 * {@code a-zA-Z0-1} will return a string containing all upper and lower
319 * case characters (from a to z) and all digits form 0 to 9.
320 *
321 * @see #expand(CharSequence)
322 *
323 * @param pattern the {@code pattern} to expand.
324 * @return the expanded pattern.
325 * @throws PatternSyntaxException if the pattern could not be expanded.
326 * @throws NullPointerException if the pattern is {@code null}.
327 */
328 public static CharSeq of(final CharSequence pattern) {
329 return new CharSeq(expand(pattern));
330 }
331
332 /**
333 * @deprecated Use {@link #of(CharSequence)} instead.
334 */
335 @Deprecated
336 public static CharSeq valueOf(final CharSequence pattern) {
337 return of(pattern);
338 }
339
340 /**
341 * Expands the characters between {@code a} and {@code b}.
342 *
343 * @see #expand(char, char)
344 *
345 * @param a the start character.
346 * @param b the stop character.
347 * @return the expanded characters.
348 */
349 public static CharSeq of(final char a, final char b) {
350 return new CharSeq(expand(a, b));
351 }
352
353 /**
354 * @deprecated Use {@link #of(char, char)} instead.
355 */
356 @Deprecated
357 public static CharSeq valueOf(final char a, final char b) {
358 return of(a, b);
359 }
360
361 /**
362 * Helper method for creating a sequence of characters from the given
363 * {@code CharSequence}. The returned sequence will contain all characters
364 * in the original order.
365 *
366 * @param chars the char sequence to convert.
367 * @return a sequence which will contain all given chars in the original
368 * order.
369 */
370 public static ISeq<Character> toISeq(final CharSequence chars) {
371 final Array<Character> seq = new Array<>(chars.length());
372 for (int i = 0; i < chars.length(); ++i) {
373 seq.set(i, chars.charAt(i));
374 }
375
376 return seq.toISeq();
377 }
378
379 }
|