CharSeq.java

/*
 * Java Genetic Algorithm Library (@__identifier__@).
 * Copyright (c) @__year__@ Franz Wilhelmstötter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Author:
 *    Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
 */
package org.jenetics.util;

import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.jenetics.internal.util.object.eq;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.regex.PatternSyntaxException;

import javolution.lang.Immutable;

import org.jenetics.internal.util.HashBuilder;

/**
 * This class is used for holding the valid characters of an
 * {@link org.jenetics.CharacterGene}. It is not a character sequence in the
 * classical sense. The characters of this sequence are sorted and doesn't
 * contain duplicate values, like a set.
 *
 * [code]
 * final CharSeq cs1 = new CharSeq("abcdeaafg");
 * final CharSeq cs2 = new CharSeq("gfedcbabb");
 * assert(cs1.equals(cs2));
 * [/code]
 *
 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
 * @since 1.0
 * @version 1.6 &mdash; <em>$Date: 2014-03-01 $</em>
 */
public final class CharSeq
	extends AbstractCharSeq
	implements
		CharSequence,
		ISeq<Character>,
		Comparable<CharSeq>,
		Immutable,
		Serializable
{
	private static final long serialVersionUID = 2L;

	/**
	 * Create a new (distinct) CharSeq from the given {@code characters}. The
	 * given {@link CharSequence} is sorted and duplicate values are removed
	 *
	 * @see #CharSeq(CharSequence)
	 *
	 * @param characters the characters.
	 * @throws NullPointerException if the {@code characters} are {@code null}.
	 */
	public CharSeq(final char[] characters) {
		super(distinct(characters.clone()));
	}

	/**
	 * Create a new (distinct) CharSeq from the given {@code characters}. The
	 * given {@link CharSequence} is sorted and duplicate values are removed.
	 *
	 * @param characters the characters.
	 * @throws NullPointerException if the {@code characters} are {@code null}.
	 */
	public CharSeq(final CharSequence characters) {
		this(toCharArray(characters));
	}

	private static char[] toCharArray(final CharSequence characters) {
		requireNonNull(characters, "Characters");

		final char[] chars = new char[characters.length()];
		for (int i = 0; i < characters.length(); ++i) {
			chars[i] = characters.charAt(i);
		}

		return chars;
	}

	private static char[] distinct(final char[] chars) {
		char[] result = chars;

		if (chars.length > 0) {
			Arrays.sort(result);

			int nextIndex = 0;
			int count = 1;
			char last = result[0];

			for (int i = 1; i < result.length; ++i) {
				while (nextIndex < result.length && result[nextIndex] == last) {
					++nextIndex;
				}
				if (nextIndex < result.length) {
					last = result[nextIndex];
					result[i] = last;
					++count;
				}
			}

			char[] array = new char[count];
			System.arraycopy(result, 0, array, 0, count);
			result = array;
		}

		return result;
	}

	@Override
	public boolean contains(final Object object) {
		boolean contains = false;
		if (object instanceof Character) {
			contains = contains((Character)object);
		}

		return contains;
	}

	/**
	 * Test whether this character set contains the given character {@code c}.
	 *
	 * @param c the character to test.
	 * @return {@code true} if this character set contains the given character,
	 *          {@code false} otherwise.
	 * @throws NullPointerException if the given character {@code c} is
	 *          {@code null}.
	 */
	public boolean contains(final Character c) {
		return contains(c.charValue());
	}

	/**
	 * Test whether this character set contains the given character {@code c}.
	 *
	 * @param c the character to test.
	 * @return {@code true} if this character set contains the given character,
	 *          {@code false} otherwise.
	 */
	public boolean contains(final char c) {
		return Arrays.binarySearch(_characters, c) >= 0;
	}

	@Override
	public char charAt(int index) {
		return _characters[index];
	}

	@Override
	public int length() {
		return _characters.length;
	}

	@Override
	public CharSeq subSequence(int start, int end) {
		return new CharSeq(new String(_characters, start, end - start));
	}

	/**
	 * Test whether this character set is empty.
	 *
	 * @return {@code true} if this character set is empty, {@code false}
	 *          otherwise.
	 */
	public boolean isEmpty() {
		return _characters.length == 0;
	}

	@Override
	public Iterator<Character> iterator() {
		return new Iterator<Character>() {
			private int _pos = 0;
			@Override public boolean hasNext() {
				return _pos < _characters.length;
			}
			@Override public Character next() {
				if (!hasNext()) {
					throw new NoSuchElementException(format(
						"Index %s is out of range [0, %s)",
						_pos, _characters.length
					));
				}
				return _characters[_pos++];
			}
			@Override public void remove() {
				throw new UnsupportedOperationException();
			}
		};
	}

	@Override
	public int hashCode() {
		return HashBuilder.of(getClass()).and(_characters).value();
	}

	@Override
	public boolean equals(final Object object) {
		if (object == this) {
			return true;
		}
		if (!(object instanceof CharSeq)) {
			return false;
		}

		final CharSeq ch = (CharSeq)object;
		return eq(_characters, ch._characters);
	}

	@Override
	public int compareTo(final CharSeq set) {
		int result = 0;

		final int n = Math.min(_characters.length, set._characters.length);
		for (int i = 0; i < n && result == 0; ++i) {
			result = _characters[i] - set._characters[i];
		}
		if (result == 0) {
			result = _characters.length - set._characters.length;
		}

		return result;
	}

	@Override
	public String toString() {
		return new String(_characters);
	}

	/**
	 * Expands the character range for the given {@code pattern}. E.g
	 * {@code a-zA-Z0-1} will return a string containing all upper and lower
	 * case characters (from a to z) and all digits form 0 to 9.
	 *
	 * @param pattern the {@code pattern} to expand.
	 * @return the expanded pattern.
	 * @throws PatternSyntaxException if the pattern could not be expanded.
	 * @throws NullPointerException if the pattern is {@code null}.
	 */
	public static String expand(final CharSequence pattern) {
		requireNonNull(pattern, "Pattern");
		final StringBuilder out = new StringBuilder();

		for (int i = 0, n = pattern.length(); i < n; ++i) {
			if (pattern.charAt(i) == '\\') {
				++i;
				if (i < pattern.length()) {
					out.append(pattern.charAt(i));
				}
			} else if (pattern.charAt(i) == '-') {
				if (i <= 0 || i >= (pattern.length() - 1)) {
					throw new PatternSyntaxException(
						"Dangling range operator '-'", pattern.toString(),
						pattern.length() - 1
					);
				}

				final String range = expand(
					pattern.charAt(i - 1),
					pattern.charAt(i + 1)
				);
				out.append(range);

				++i;
			} else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
				out.append(pattern.charAt(i));
			}
		}

		return out.toString();
	}

	/**
	 * Expands the characters between {@code a} and {@code b}.
	 *
	 * @param a the start character.
	 * @param b the stop character.
	 * @return the expanded characters.
	 */
	public static String expand(final char a, final char b) {
		final StringBuilder out = new StringBuilder();

		if (a < b) {
			char c = a;
			while (c <= b) {
				out.append(c);
				c = (char) (c + 1);
			}
		} else if (a > b) {
			char c = a;
			while (c >= b) {
				out.append(c);
				c = (char) (c - 1);
			}
		}

		return out.toString();
	}

	/**
	 * Expands the character range for the given {@code pattern}. E.g
	 * {@code a-zA-Z0-1} will return a string containing all upper and lower
	 * case characters (from a to z) and all digits form 0 to 9.
	 *
	 * @see #expand(CharSequence)
	 *
	 * @param pattern the {@code pattern} to expand.
	 * @return the expanded pattern.
	 * @throws PatternSyntaxException if the pattern could not be expanded.
	 * @throws NullPointerException if the pattern is {@code null}.
	 */
	public static CharSeq of(final CharSequence pattern) {
		return new CharSeq(expand(pattern));
	}

	/**
	 * @deprecated Use {@link #of(CharSequence)} instead.
	 */
	@Deprecated
	public static CharSeq valueOf(final CharSequence pattern) {
		return of(pattern);
	}

	/**
	 * Expands the characters between {@code a} and {@code b}.
	 *
	 * @see #expand(char, char)
	 *
	 * @param a the start character.
	 * @param b the stop character.
	 * @return the expanded characters.
	 */
	public static CharSeq of(final char a, final char b) {
		return new CharSeq(expand(a, b));
	}

	/**
	 * @deprecated Use {@link #of(char, char)} instead.
	 */
	@Deprecated
	public static CharSeq valueOf(final char a, final char b) {
		return of(a, b);
	}

	/**
	 * Helper method for creating a sequence of characters from the given
	 * {@code CharSequence}. The returned sequence will contain all characters
	 * in the original order.
	 *
	 * @param chars the char sequence to convert.
	 * @return a sequence which will contain all given chars in the original
	 *         order.
	 */
	public static ISeq<Character> toISeq(final CharSequence chars) {
		final Array<Character> seq = new Array<>(chars.length());
		for (int i = 0; i < chars.length(); ++i) {
			seq.set(i, chars.charAt(i));
		}

		return seq.toISeq();
	}

}