001 /*
002 * Java Genetic Algorithm Library (jenetics-2.0.2).
003 * Copyright (c) 2007-2014 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.util;
021
022 import static java.lang.String.format;
023 import static java.util.Objects.requireNonNull;
024 import static org.jenetics.internal.util.object.eq;
025
026 import java.io.Serializable;
027 import java.util.Arrays;
028 import java.util.Iterator;
029 import java.util.NoSuchElementException;
030 import java.util.regex.PatternSyntaxException;
031
032 import org.jenetics.internal.util.HashBuilder;
033
034 /**
035 * This class is used for holding the valid characters of an
036 * {@link org.jenetics.CharacterGene}. It is not a character sequence in the
037 * classical sense. The characters of this sequence are sorted and doesn't
038 * contain duplicate values, like a set.
039 *
040 * [code]
041 * final CharSeq cs1 = new CharSeq("abcdeaafg");
042 * final CharSeq cs2 = new CharSeq("gfedcbabb");
043 * assert(cs1.equals(cs2));
044 * [/code]
045 *
046 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
047 * @since 1.0
048 * @version 2.0 — <em>$Date: 2014-04-08 $</em>
049 */
050 public final class CharSeq
051 extends AbstractCharSeq
052 implements
053 CharSequence,
054 ISeq<Character>,
055 Comparable<CharSeq>,
056 Serializable
057 {
058 private static final long serialVersionUID = 2L;
059
060 /**
061 * Create a new (distinct) CharSeq from the given {@code characters}. The
062 * given {@link CharSequence} is sorted and duplicate values are removed
063 *
064 * @see #CharSeq(CharSequence)
065 *
066 * @param characters the characters.
067 * @throws NullPointerException if the {@code characters} are {@code null}.
068 */
069 public CharSeq(final char[] characters) {
070 super(distinct(characters.clone()));
071 }
072
073 /**
074 * Create a new (distinct) CharSeq from the given {@code characters}. The
075 * given {@link CharSequence} is sorted and duplicate values are removed.
076 *
077 * @param characters the characters.
078 * @throws NullPointerException if the {@code characters} are {@code null}.
079 */
080 public CharSeq(final CharSequence characters) {
081 this(toCharArray(characters));
082 }
083
084 private static char[] toCharArray(final CharSequence characters) {
085 requireNonNull(characters, "Characters");
086
087 final char[] chars = new char[characters.length()];
088 for (int i = chars.length; --i >= 0;) {
089 chars[i] = characters.charAt(i);
090 }
091
092 return chars;
093 }
094
095 private static char[] distinct(final char[] chars) {
096 Arrays.sort(chars);
097
098 int size = 0;
099 for (int i = 0, j = 0, n = chars.length; i < n && j < n; ++i) {
100 chars[i] = chars[j];
101 ++size;
102
103 while (j < n && chars[j] == chars[i]) ++j;
104 }
105
106 final char[] array = new char[size];
107 System.arraycopy(chars, 0, array, 0, size);
108 return array;
109 }
110
111 @Override
112 public boolean contains(final Object object) {
113 return object instanceof Character && contains((Character)object);
114 }
115
116 /**
117 * Test whether this character set contains the given character {@code c}.
118 *
119 * @param c the character to test.
120 * @return {@code true} if this character set contains the given character,
121 * {@code false} otherwise.
122 * @throws NullPointerException if the given character {@code c} is
123 * {@code null}.
124 */
125 public boolean contains(final Character c) {
126 return contains(c.charValue());
127 }
128
129 /**
130 * Test whether this character set contains the given character {@code c}.
131 *
132 * @param c the character to test.
133 * @return {@code true} if this character set contains the given character,
134 * {@code false} otherwise.
135 */
136 public boolean contains(final char c) {
137 return Arrays.binarySearch(_characters, c) >= 0;
138 }
139
140 @Override
141 public char charAt(int index) {
142 return _characters[index];
143 }
144
145 @Override
146 public int length() {
147 return _characters.length;
148 }
149
150 @Override
151 public CharSeq subSequence(int start, int end) {
152 return new CharSeq(new String(_characters, start, end - start));
153 }
154
155 /**
156 * Test whether this character set is empty.
157 *
158 * @return {@code true} if this character set is empty, {@code false}
159 * otherwise.
160 */
161 public boolean isEmpty() {
162 return _characters.length == 0;
163 }
164
165 @Override
166 public Iterator<Character> iterator() {
167 return new Iterator<Character>() {
168 private int _pos = 0;
169 @Override public boolean hasNext() {
170 return _pos < _characters.length;
171 }
172 @Override public Character next() {
173 if (!hasNext()) {
174 throw new NoSuchElementException(format(
175 "Index %s is out of range [0, %s)",
176 _pos, _characters.length
177 ));
178 }
179 return _characters[_pos++];
180 }
181 @Override public void remove() {
182 throw new UnsupportedOperationException();
183 }
184 };
185 }
186
187 @Override
188 public int hashCode() {
189 return HashBuilder.of(getClass()).and(_characters).value();
190 }
191
192 @Override
193 public boolean equals(final Object object) {
194 if (object == this) {
195 return true;
196 }
197 if (!(object instanceof CharSeq)) {
198 return false;
199 }
200
201 final CharSeq ch = (CharSeq)object;
202 return eq(_characters, ch._characters);
203 }
204
205 @Override
206 public int compareTo(final CharSeq set) {
207 int result = 0;
208
209 final int n = Math.min(_characters.length, set._characters.length);
210 for (int i = 0; i < n && result == 0; ++i) {
211 result = _characters[i] - set._characters[i];
212 }
213 if (result == 0) {
214 result = _characters.length - set._characters.length;
215 }
216
217 return result;
218 }
219
220 @Override
221 public String toString() {
222 return new String(_characters);
223 }
224
225 /**
226 * Expands the character range for the given {@code pattern}. E.g
227 * {@code a-zA-Z0-1} will return a string containing all upper and lower
228 * case characters (from a to z) and all digits form 0 to 9.
229 *
230 * @param pattern the {@code pattern} to expand.
231 * @return the expanded pattern.
232 * @throws PatternSyntaxException if the pattern could not be expanded.
233 * @throws NullPointerException if the pattern is {@code null}.
234 */
235 public static String expand(final CharSequence pattern) {
236 requireNonNull(pattern, "Pattern");
237 final StringBuilder out = new StringBuilder();
238
239 for (int i = 0, n = pattern.length(); i < n; ++i) {
240 if (pattern.charAt(i) == '\\') {
241 ++i;
242 if (i < pattern.length()) {
243 out.append(pattern.charAt(i));
244 }
245 } else if (pattern.charAt(i) == '-') {
246 if (i <= 0 || i >= (pattern.length() - 1)) {
247 throw new PatternSyntaxException(
248 "Dangling range operator '-'", pattern.toString(),
249 pattern.length() - 1
250 );
251 }
252
253 final String range = expand(
254 pattern.charAt(i - 1),
255 pattern.charAt(i + 1)
256 );
257 out.append(range);
258
259 ++i;
260 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
261 out.append(pattern.charAt(i));
262 }
263 }
264
265 return out.toString();
266 }
267
268 /**
269 * Expands the characters between {@code a} and {@code b}.
270 *
271 * @param a the start character.
272 * @param b the stop character.
273 * @return the expanded characters.
274 */
275 public static String expand(final char a, final char b) {
276 final StringBuilder out = new StringBuilder();
277
278 if (a < b) {
279 char c = a;
280 while (c <= b) {
281 out.append(c);
282 c = (char) (c + 1);
283 }
284 } else if (a > b) {
285 char c = a;
286 while (c >= b) {
287 out.append(c);
288 c = (char) (c - 1);
289 }
290 }
291
292 return out.toString();
293 }
294
295 /**
296 * Expands the character range for the given {@code pattern}. E.g
297 * {@code a-zA-Z0-1} will return a string containing all upper and lower
298 * case characters (from a to z) and all digits form 0 to 9.
299 *
300 * @see #expand(CharSequence)
301 *
302 * @param pattern the {@code pattern} to expand.
303 * @return the expanded pattern.
304 * @throws PatternSyntaxException if the pattern could not be expanded.
305 * @throws NullPointerException if the pattern is {@code null}.
306 */
307 public static CharSeq of(final CharSequence pattern) {
308 return new CharSeq(expand(pattern));
309 }
310
311 /**
312 * Expands the characters between {@code a} and {@code b}.
313 *
314 * @see #expand(char, char)
315 *
316 * @param a the start character.
317 * @param b the stop character.
318 * @return the expanded characters.
319 */
320 public static CharSeq of(final char a, final char b) {
321 return new CharSeq(expand(a, b));
322 }
323
324 /**
325 * Helper method for creating a sequence of characters from the given
326 * {@code CharSequence}. The returned sequence will contain all characters
327 * in the original order.
328 *
329 * @param chars the char sequence to convert.
330 * @return a sequence which will contain all given chars in the original
331 * order.
332 */
333 public static ISeq<Character> toISeq(final CharSequence chars) {
334 final Array<Character> seq = new Array<>(chars.length());
335 for (int i = 0; i < chars.length(); ++i) {
336 seq.set(i, chars.charAt(i));
337 }
338
339 return seq.toISeq();
340 }
341
342 }
|