/* * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ /* * Copyright 1999-2002,2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sun.org.apache.xerces.internal.impl.xpath.regex; import java.text.CharacterIterator; /** * @xerces.internal * */ public final class REUtil { private REUtil() { } static final int composeFromSurrogates(int high, int low) { return 0x10000 + ((high-0xd800)<<10) + low-0xdc00; } static final boolean isLowSurrogate(int ch) { return (ch & 0xfc00) == 0xdc00; } static final boolean isHighSurrogate(int ch) { return (ch & 0xfc00) == 0xd800; } static final String decomposeToSurrogates(int ch) { char[] chs = new char[2]; ch -= 0x10000; chs[0] = (char)((ch>>10)+0xd800); chs[1] = (char)((ch&0x3ff)+0xdc00); return new String(chs); } static final String substring(CharacterIterator iterator, int begin, int end) { char[] src = new char[end-begin]; for (int i = 0; i < src.length; i ++) src[i] = iterator.setIndex(i+begin); return new String(src); } // ================================================================ static final int getOptionValue(int ch) { int ret = 0; switch (ch) { case 'i': ret = RegularExpression.IGNORE_CASE; break; case 'm': ret = RegularExpression.MULTIPLE_LINES; break; case 's': ret = RegularExpression.SINGLE_LINE; break; case 'x': ret = RegularExpression.EXTENDED_COMMENT; break; case 'u': ret = RegularExpression.USE_UNICODE_CATEGORY; break; case 'w': ret = RegularExpression.UNICODE_WORD_BOUNDARY; break; case 'F': ret = RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION; break; case 'H': ret = RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; break; case 'X': ret = RegularExpression.XMLSCHEMA_MODE; break; case ',': ret = RegularExpression.SPECIAL_COMMA; break; default: } return ret; } static final int parseOptions(String opts) throws ParseException { if (opts == null) return 0; int options = 0; for (int i = 0; i < opts.length(); i ++) { int v = getOptionValue(opts.charAt(i)); if (v == 0) throw new ParseException("Unknown Option: "+opts.substring(i), -1); options |= v; } return options; } static final String createOptionString(int options) { StringBuffer sb = new StringBuffer(9); if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) sb.append((char)'F'); if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) sb.append((char)'H'); if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) sb.append((char)'X'); if ((options & RegularExpression.IGNORE_CASE) != 0) sb.append((char)'i'); if ((options & RegularExpression.MULTIPLE_LINES) != 0) sb.append((char)'m'); if ((options & RegularExpression.SINGLE_LINE) != 0) sb.append((char)'s'); if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) sb.append((char)'u'); if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) sb.append((char)'w'); if ((options & RegularExpression.EXTENDED_COMMENT) != 0) sb.append((char)'x'); if ((options & RegularExpression.SPECIAL_COMMA) != 0) sb.append((char)','); return sb.toString().intern(); } // ================================================================ static String stripExtendedComment(String regex) { int len = regex.length(); StringBuffer buffer = new StringBuffer(len); int offset = 0; while (offset < len) { int ch = regex.charAt(offset++); // Skips a white space. if (ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r' || ch == ' ') continue; if (ch == '#') { // Skips chracters between '#' and a line end. while (offset < len) { ch = regex.charAt(offset++); if (ch == '\r' || ch == '\n') break; } continue; } int next; // Strips an escaped white space. if (ch == '\\' && offset < len) { if ((next = regex.charAt(offset)) == '#' || next == '\t' || next == '\n' || next == '\f' || next == '\r' || next == ' ') { buffer.append((char)next); offset ++; } else { // Other escaped character. buffer.append((char)'\\'); buffer.append((char)next); offset ++; } } else // As is. buffer.append((char)ch); } return buffer.toString(); } // ================================================================ /** * Sample entry. *